1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, Luca Barbato
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker #include "src/ppc/dav1d_types.h"
29*c0909341SAndroid Build Coastguard Worker #include "src/ppc/itx.h"
30*c0909341SAndroid Build Coastguard Worker #include "src/ppc/utils.h"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker #if BITDEPTH == 8
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker #define LOAD_4(src, stride, a, b, c, d) \
35*c0909341SAndroid Build Coastguard Worker { \
36*c0909341SAndroid Build Coastguard Worker uint8_t *s = src; \
37*c0909341SAndroid Build Coastguard Worker a = vec_xl(0, s); \
38*c0909341SAndroid Build Coastguard Worker s += stride; \
39*c0909341SAndroid Build Coastguard Worker b = vec_xl(0, s); \
40*c0909341SAndroid Build Coastguard Worker s += stride; \
41*c0909341SAndroid Build Coastguard Worker c = vec_xl(0, s); \
42*c0909341SAndroid Build Coastguard Worker s += stride; \
43*c0909341SAndroid Build Coastguard Worker d = vec_xl(0, s); \
44*c0909341SAndroid Build Coastguard Worker }
45*c0909341SAndroid Build Coastguard Worker
46*c0909341SAndroid Build Coastguard Worker #define LOAD_DECLARE_2_I16(src, a, b) \
47*c0909341SAndroid Build Coastguard Worker i16x8 a = vec_xl(0, src); \
48*c0909341SAndroid Build Coastguard Worker i16x8 b = vec_xl(0, src + 8);
49*c0909341SAndroid Build Coastguard Worker
50*c0909341SAndroid Build Coastguard Worker #define UNPACK_DECLARE_4_I16_I32(sa, sb, a, b, c, d) \
51*c0909341SAndroid Build Coastguard Worker i32x4 a = i16h_to_i32(sa); \
52*c0909341SAndroid Build Coastguard Worker i32x4 b = i16l_to_i32(sa); \
53*c0909341SAndroid Build Coastguard Worker i32x4 c = i16h_to_i32(sb); \
54*c0909341SAndroid Build Coastguard Worker i32x4 d = i16l_to_i32(sb);
55*c0909341SAndroid Build Coastguard Worker
56*c0909341SAndroid Build Coastguard Worker #define LOAD_COEFF_4(coeff) \
57*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c01, c23) \
58*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c01, c23, c0, c1, c2, c3)
59*c0909341SAndroid Build Coastguard Worker
60*c0909341SAndroid Build Coastguard Worker #define LOAD_SCALE_COEFF_4x8(coeff, scale) \
61*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c04, c15) \
62*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c26, c37) \
63*c0909341SAndroid Build Coastguard Worker i16x8 c01 = (i16x8)vec_mergeh((i64x2)c04, (i64x2)c15); \
64*c0909341SAndroid Build Coastguard Worker i16x8 c23 = (i16x8)vec_mergeh((i64x2)c26, (i64x2)c37); \
65*c0909341SAndroid Build Coastguard Worker i16x8 c45 = (i16x8)vec_mergel((i64x2)c04, (i64x2)c15); \
66*c0909341SAndroid Build Coastguard Worker i16x8 c67 = (i16x8)vec_mergel((i64x2)c26, (i64x2)c37); \
67*c0909341SAndroid Build Coastguard Worker c01 = vec_mradds(c01, scale, vec_splat_s16(0)); \
68*c0909341SAndroid Build Coastguard Worker c23 = vec_mradds(c23, scale, vec_splat_s16(0)); \
69*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c01, c23, c0, c1, c2, c3) \
70*c0909341SAndroid Build Coastguard Worker c45 = vec_mradds(c45, scale, vec_splat_s16(0)); \
71*c0909341SAndroid Build Coastguard Worker c67 = vec_mradds(c67, scale, vec_splat_s16(0)); \
72*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c45, c67, c4, c5, c6, c7)
73*c0909341SAndroid Build Coastguard Worker
74*c0909341SAndroid Build Coastguard Worker #define LOAD_SCALE_COEFF_8x4(coeff, scale) \
75*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c01, c23) \
76*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c45, c67) \
77*c0909341SAndroid Build Coastguard Worker c01 = vec_mradds(c01, scale, vec_splat_s16(0)); \
78*c0909341SAndroid Build Coastguard Worker c23 = vec_mradds(c23, scale, vec_splat_s16(0)); \
79*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c01, c23, c0, c1, c2, c3) \
80*c0909341SAndroid Build Coastguard Worker c45 = vec_mradds(c45, scale, vec_splat_s16(0)); \
81*c0909341SAndroid Build Coastguard Worker c67 = vec_mradds(c67, scale, vec_splat_s16(0)); \
82*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c45, c67, c4, c5, c6, c7)
83*c0909341SAndroid Build Coastguard Worker
84*c0909341SAndroid Build Coastguard Worker #define LOAD_COEFF_8x8(coeff) \
85*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c0, c1) \
86*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c2, c3) \
87*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+32, c4, c5) \
88*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+48, c6, c7) \
89*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c0, c1, c0h, c0l, c1h, c1l) \
90*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c2, c3, c2h, c2l, c3h, c3l) \
91*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c4, c5, c4h, c4l, c5h, c5l) \
92*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c6, c7, c6h, c6l, c7h, c7l) \
93*c0909341SAndroid Build Coastguard Worker
94*c0909341SAndroid Build Coastguard Worker #define LOAD_COEFF_4x16(coeff) \
95*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, a0b0, c0d0) \
96*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, a1b1, c1d1) \
97*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+32, a2b2, c2d2) \
98*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+48, a3b3, c3d3) \
99*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(a0b0, c0d0, cA0, cB0, cC0, cD0) \
100*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(a1b1, c1d1, cA1, cB1, cC1, cD1) \
101*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(a2b2, c2d2, cA2, cB2, cC2, cD2) \
102*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(a3b3, c3d3, cA3, cB3, cC3, cD3)
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Worker #define LOAD_DECLARE_4(src, stride, a, b, c, d) \
105*c0909341SAndroid Build Coastguard Worker u8x16 a, b, c, d; \
106*c0909341SAndroid Build Coastguard Worker LOAD_4(src, stride, a, b, c, d)
107*c0909341SAndroid Build Coastguard Worker
108*c0909341SAndroid Build Coastguard Worker #define STORE_LEN(l, dst, stride, a, b, c, d) \
109*c0909341SAndroid Build Coastguard Worker { \
110*c0909341SAndroid Build Coastguard Worker uint8_t *dst2 = dst; \
111*c0909341SAndroid Build Coastguard Worker vec_xst_len(a, dst2, l); \
112*c0909341SAndroid Build Coastguard Worker dst2 += stride; \
113*c0909341SAndroid Build Coastguard Worker vec_xst_len(b, dst2, l); \
114*c0909341SAndroid Build Coastguard Worker dst2 += stride; \
115*c0909341SAndroid Build Coastguard Worker vec_xst_len(c, dst2, l); \
116*c0909341SAndroid Build Coastguard Worker dst2 += stride; \
117*c0909341SAndroid Build Coastguard Worker vec_xst_len(d, dst2, l); \
118*c0909341SAndroid Build Coastguard Worker }
119*c0909341SAndroid Build Coastguard Worker
120*c0909341SAndroid Build Coastguard Worker #define STORE_4(dst, stride, a, b, c, d) \
121*c0909341SAndroid Build Coastguard Worker STORE_LEN(4, dst, stride, a, b, c, d)
122*c0909341SAndroid Build Coastguard Worker
123*c0909341SAndroid Build Coastguard Worker #define STORE_8(dst, stride, ab, cd, ef, gh) \
124*c0909341SAndroid Build Coastguard Worker STORE_LEN(8, dst, stride, ab, cd, ef, gh)
125*c0909341SAndroid Build Coastguard Worker
126*c0909341SAndroid Build Coastguard Worker #define STORE_16(dst, stride, l0, l1, l2, l3) \
127*c0909341SAndroid Build Coastguard Worker { \
128*c0909341SAndroid Build Coastguard Worker uint8_t *dst##2 = dst; \
129*c0909341SAndroid Build Coastguard Worker vec_xst(l0, 0, dst##2); \
130*c0909341SAndroid Build Coastguard Worker dst##2 += stride; \
131*c0909341SAndroid Build Coastguard Worker vec_xst(l1, 0, dst##2); \
132*c0909341SAndroid Build Coastguard Worker dst##2 += stride; \
133*c0909341SAndroid Build Coastguard Worker vec_xst(l2, 0, dst##2); \
134*c0909341SAndroid Build Coastguard Worker dst##2 += stride; \
135*c0909341SAndroid Build Coastguard Worker vec_xst(l3, 0, dst##2); \
136*c0909341SAndroid Build Coastguard Worker }
137*c0909341SAndroid Build Coastguard Worker
138*c0909341SAndroid Build Coastguard Worker #define APPLY_COEFF_4(a, b, c, d, c01, c23) \
139*c0909341SAndroid Build Coastguard Worker { \
140*c0909341SAndroid Build Coastguard Worker u8x16 ab = (u8x16)vec_mergeh((u32x4)a, (u32x4)b); \
141*c0909341SAndroid Build Coastguard Worker u8x16 cd = (u8x16)vec_mergeh((u32x4)c, (u32x4)d); \
142*c0909341SAndroid Build Coastguard Worker \
143*c0909341SAndroid Build Coastguard Worker c01 = vec_adds(c01, vec_splat_s16(8)); \
144*c0909341SAndroid Build Coastguard Worker c23 = vec_adds(c23, vec_splat_s16(8)); \
145*c0909341SAndroid Build Coastguard Worker c01 = vec_sra(c01, vec_splat_u16(4)); \
146*c0909341SAndroid Build Coastguard Worker c23 = vec_sra(c23, vec_splat_u16(4)); \
147*c0909341SAndroid Build Coastguard Worker \
148*c0909341SAndroid Build Coastguard Worker i16x8 abs = u8h_to_i16(ab); \
149*c0909341SAndroid Build Coastguard Worker i16x8 cds = u8h_to_i16(cd); \
150*c0909341SAndroid Build Coastguard Worker \
151*c0909341SAndroid Build Coastguard Worker abs = vec_adds(abs, c01); \
152*c0909341SAndroid Build Coastguard Worker cds = vec_adds(cds, c23); \
153*c0909341SAndroid Build Coastguard Worker \
154*c0909341SAndroid Build Coastguard Worker a = vec_packsu(abs, abs); \
155*c0909341SAndroid Build Coastguard Worker c = vec_packsu(cds, cds); \
156*c0909341SAndroid Build Coastguard Worker \
157*c0909341SAndroid Build Coastguard Worker b = (u8x16)vec_mergeo((u32x4)a, (u32x4)a); \
158*c0909341SAndroid Build Coastguard Worker d = (u8x16)vec_mergeo((u32x4)c, (u32x4)c); \
159*c0909341SAndroid Build Coastguard Worker }
160*c0909341SAndroid Build Coastguard Worker
161*c0909341SAndroid Build Coastguard Worker #define APPLY_COEFF_8x4(ab, cd, c01, c23) \
162*c0909341SAndroid Build Coastguard Worker { \
163*c0909341SAndroid Build Coastguard Worker i16x8 abs = u8h_to_i16(ab); \
164*c0909341SAndroid Build Coastguard Worker i16x8 cds = u8h_to_i16(cd); \
165*c0909341SAndroid Build Coastguard Worker c01 = vec_adds(c01, vec_splat_s16(8)); \
166*c0909341SAndroid Build Coastguard Worker c23 = vec_adds(c23, vec_splat_s16(8)); \
167*c0909341SAndroid Build Coastguard Worker c01 = vec_sra(c01, vec_splat_u16(4)); \
168*c0909341SAndroid Build Coastguard Worker c23 = vec_sra(c23, vec_splat_u16(4)); \
169*c0909341SAndroid Build Coastguard Worker \
170*c0909341SAndroid Build Coastguard Worker abs = vec_adds(abs, c01); \
171*c0909341SAndroid Build Coastguard Worker cds = vec_adds(cds, c23); \
172*c0909341SAndroid Build Coastguard Worker \
173*c0909341SAndroid Build Coastguard Worker ab = vec_packsu(abs, abs); \
174*c0909341SAndroid Build Coastguard Worker cd = vec_packsu(cds, cds); \
175*c0909341SAndroid Build Coastguard Worker }
176*c0909341SAndroid Build Coastguard Worker
177*c0909341SAndroid Build Coastguard Worker #define APPLY_COEFF_16x4(a, b, c, d, \
178*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
179*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
180*c0909341SAndroid Build Coastguard Worker { \
181*c0909341SAndroid Build Coastguard Worker i16x8 ah = u8h_to_i16(a); \
182*c0909341SAndroid Build Coastguard Worker i16x8 al = u8l_to_i16(a); \
183*c0909341SAndroid Build Coastguard Worker i16x8 bh = u8h_to_i16(b); \
184*c0909341SAndroid Build Coastguard Worker i16x8 bl = u8l_to_i16(b); \
185*c0909341SAndroid Build Coastguard Worker i16x8 ch = u8h_to_i16(c); \
186*c0909341SAndroid Build Coastguard Worker i16x8 cl = u8l_to_i16(c); \
187*c0909341SAndroid Build Coastguard Worker i16x8 dh = u8h_to_i16(d); \
188*c0909341SAndroid Build Coastguard Worker i16x8 dl = u8l_to_i16(d); \
189*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c00c01, c02c03, c04c05, c06c07, vec_splat_s16(8), vec_splat_u16(4)) \
190*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c08c09, c10c11, c12c13, c14c15, vec_splat_s16(8), vec_splat_u16(4)) \
191*c0909341SAndroid Build Coastguard Worker \
192*c0909341SAndroid Build Coastguard Worker ah = vec_adds(ah, c00c01); \
193*c0909341SAndroid Build Coastguard Worker al = vec_adds(al, c02c03); \
194*c0909341SAndroid Build Coastguard Worker bh = vec_adds(bh, c04c05); \
195*c0909341SAndroid Build Coastguard Worker bl = vec_adds(bl, c06c07); \
196*c0909341SAndroid Build Coastguard Worker ch = vec_adds(ch, c08c09); \
197*c0909341SAndroid Build Coastguard Worker cl = vec_adds(cl, c10c11); \
198*c0909341SAndroid Build Coastguard Worker dh = vec_adds(dh, c12c13); \
199*c0909341SAndroid Build Coastguard Worker dl = vec_adds(dl, c14c15); \
200*c0909341SAndroid Build Coastguard Worker \
201*c0909341SAndroid Build Coastguard Worker a = vec_packsu(ah, al); \
202*c0909341SAndroid Build Coastguard Worker b = vec_packsu(bh, bl); \
203*c0909341SAndroid Build Coastguard Worker c = vec_packsu(ch, cl); \
204*c0909341SAndroid Build Coastguard Worker d = vec_packsu(dh, dl); \
205*c0909341SAndroid Build Coastguard Worker }
206*c0909341SAndroid Build Coastguard Worker
207*c0909341SAndroid Build Coastguard Worker #define IDCT_4_INNER(c0, c1, c2, c3) \
208*c0909341SAndroid Build Coastguard Worker { \
209*c0909341SAndroid Build Coastguard Worker i32x4 o0 = vec_add(c0, c2); \
210*c0909341SAndroid Build Coastguard Worker i32x4 o1 = vec_sub(c0, c2); \
211*c0909341SAndroid Build Coastguard Worker \
212*c0909341SAndroid Build Coastguard Worker i32x4 v2896 = vec_splats(2896); \
213*c0909341SAndroid Build Coastguard Worker i32x4 v1567 = vec_splats(1567); \
214*c0909341SAndroid Build Coastguard Worker i32x4 v3784 = vec_splats(3784); \
215*c0909341SAndroid Build Coastguard Worker i32x4 v2048 = vec_splats(2048); \
216*c0909341SAndroid Build Coastguard Worker \
217*c0909341SAndroid Build Coastguard Worker o0 = vec_mul(o0, v2896); \
218*c0909341SAndroid Build Coastguard Worker o1 = vec_mul(o1, v2896); \
219*c0909341SAndroid Build Coastguard Worker \
220*c0909341SAndroid Build Coastguard Worker i32x4 o2a = vec_mul(c1, v1567); \
221*c0909341SAndroid Build Coastguard Worker i32x4 o2b = vec_mul(c3, v3784); \
222*c0909341SAndroid Build Coastguard Worker i32x4 o3a = vec_mul(c1, v3784); \
223*c0909341SAndroid Build Coastguard Worker i32x4 o3b = vec_mul(c3, v1567); \
224*c0909341SAndroid Build Coastguard Worker \
225*c0909341SAndroid Build Coastguard Worker i32x4 o2 = vec_sub(o2a, o2b); \
226*c0909341SAndroid Build Coastguard Worker i32x4 o3 = vec_add(o3a, o3b); \
227*c0909341SAndroid Build Coastguard Worker \
228*c0909341SAndroid Build Coastguard Worker u32x4 v12 = vec_splat_u32(12); \
229*c0909341SAndroid Build Coastguard Worker \
230*c0909341SAndroid Build Coastguard Worker o0 = vec_add(o0, v2048); \
231*c0909341SAndroid Build Coastguard Worker o1 = vec_add(o1, v2048); \
232*c0909341SAndroid Build Coastguard Worker o2 = vec_add(o2, v2048); \
233*c0909341SAndroid Build Coastguard Worker o3 = vec_add(o3, v2048); \
234*c0909341SAndroid Build Coastguard Worker \
235*c0909341SAndroid Build Coastguard Worker o0 = vec_sra(o0, v12); \
236*c0909341SAndroid Build Coastguard Worker o1 = vec_sra(o1, v12); \
237*c0909341SAndroid Build Coastguard Worker o2 = vec_sra(o2, v12); \
238*c0909341SAndroid Build Coastguard Worker o3 = vec_sra(o3, v12); \
239*c0909341SAndroid Build Coastguard Worker \
240*c0909341SAndroid Build Coastguard Worker c0 = vec_add(o0, o3); \
241*c0909341SAndroid Build Coastguard Worker c1 = vec_add(o1, o2); \
242*c0909341SAndroid Build Coastguard Worker c2 = vec_sub(o1, o2); \
243*c0909341SAndroid Build Coastguard Worker c3 = vec_sub(o0, o3); \
244*c0909341SAndroid Build Coastguard Worker \
245*c0909341SAndroid Build Coastguard Worker }
246*c0909341SAndroid Build Coastguard Worker
247*c0909341SAndroid Build Coastguard Worker #define dct4_for_dct8(c0, c1, c2, c3, c03, c12) \
248*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c0, c1, c2, c3) \
249*c0909341SAndroid Build Coastguard Worker c03 = vec_packs(c0, c3); \
250*c0909341SAndroid Build Coastguard Worker c12 = vec_packs(c1, c2); \
251*c0909341SAndroid Build Coastguard Worker
252*c0909341SAndroid Build Coastguard Worker #define dct_4_in(c0, c1, c2, c3, c01, c23) \
253*c0909341SAndroid Build Coastguard Worker { \
254*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c0, c1, c2, c3) \
255*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
256*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
257*c0909341SAndroid Build Coastguard Worker c0 = i16h_to_i32(c01); \
258*c0909341SAndroid Build Coastguard Worker c1 = i16l_to_i32(c01); \
259*c0909341SAndroid Build Coastguard Worker c2 = i16h_to_i32(c23); \
260*c0909341SAndroid Build Coastguard Worker c3 = i16l_to_i32(c23); \
261*c0909341SAndroid Build Coastguard Worker }
262*c0909341SAndroid Build Coastguard Worker
263*c0909341SAndroid Build Coastguard Worker #define dct_4_out(c0, c1, c2, c3, c01, c23) \
264*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c0, c1, c2, c3) \
265*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
266*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
267*c0909341SAndroid Build Coastguard Worker
268*c0909341SAndroid Build Coastguard Worker
269*c0909341SAndroid Build Coastguard Worker #define IDENTITY_4(c01, c23) \
270*c0909341SAndroid Build Coastguard Worker { \
271*c0909341SAndroid Build Coastguard Worker i16x8 v1697 = vec_splats((int16_t)(1697*8)); \
272*c0909341SAndroid Build Coastguard Worker i16x8 o01 = vec_mradds(c01, v1697, vec_splat_s16(0)); \
273*c0909341SAndroid Build Coastguard Worker i16x8 o23 = vec_mradds(c23, v1697, vec_splat_s16(0)); \
274*c0909341SAndroid Build Coastguard Worker c01 = vec_adds(c01, o01); \
275*c0909341SAndroid Build Coastguard Worker c23 = vec_adds(c23, o23); \
276*c0909341SAndroid Build Coastguard Worker }
277*c0909341SAndroid Build Coastguard Worker
278*c0909341SAndroid Build Coastguard Worker #define identity_4_in(c0, c1, c2, c3, c01, c23) \
279*c0909341SAndroid Build Coastguard Worker { \
280*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c01, c23) \
281*c0909341SAndroid Build Coastguard Worker c0 = i16h_to_i32(c01); \
282*c0909341SAndroid Build Coastguard Worker c1 = i16l_to_i32(c01); \
283*c0909341SAndroid Build Coastguard Worker c2 = i16h_to_i32(c23); \
284*c0909341SAndroid Build Coastguard Worker c3 = i16l_to_i32(c23); \
285*c0909341SAndroid Build Coastguard Worker }
286*c0909341SAndroid Build Coastguard Worker
287*c0909341SAndroid Build Coastguard Worker #define identity_4_out(c0, c1, c2, c3, c01, c23) \
288*c0909341SAndroid Build Coastguard Worker { \
289*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
290*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
291*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c01, c23) \
292*c0909341SAndroid Build Coastguard Worker }
293*c0909341SAndroid Build Coastguard Worker
294*c0909341SAndroid Build Coastguard Worker #define ADST_INNER_4(c0, c1, c2, c3, oc0, oc1, oc2, oc3) \
295*c0909341SAndroid Build Coastguard Worker { \
296*c0909341SAndroid Build Coastguard Worker i32x4 v1321 = vec_splats(1321); \
297*c0909341SAndroid Build Coastguard Worker i32x4 v3803 = vec_splats(3803); \
298*c0909341SAndroid Build Coastguard Worker i32x4 v2482 = vec_splats(2482); \
299*c0909341SAndroid Build Coastguard Worker i32x4 v3344 = vec_splats(3344); \
300*c0909341SAndroid Build Coastguard Worker i32x4 v2048 = vec_splats(2048); \
301*c0909341SAndroid Build Coastguard Worker i32x4 i0_v1321 = vec_mul(c0, v1321); \
302*c0909341SAndroid Build Coastguard Worker i32x4 i0_v2482 = vec_mul(c0, v2482); \
303*c0909341SAndroid Build Coastguard Worker i32x4 i0_v3803 = vec_mul(c0, v3803); \
304*c0909341SAndroid Build Coastguard Worker i32x4 i1 = vec_mul(c1, v3344); \
305*c0909341SAndroid Build Coastguard Worker i32x4 i2_v1321 = vec_mul(c2, v1321); \
306*c0909341SAndroid Build Coastguard Worker i32x4 i2_v2482 = vec_mul(c2, v2482); \
307*c0909341SAndroid Build Coastguard Worker i32x4 i2_v3803 = vec_mul(c2, v3803); \
308*c0909341SAndroid Build Coastguard Worker i32x4 i3_v1321 = vec_mul(c3, v1321); \
309*c0909341SAndroid Build Coastguard Worker i32x4 i3_v2482 = vec_mul(c3, v2482); \
310*c0909341SAndroid Build Coastguard Worker i32x4 i3_v3803 = vec_mul(c3, v3803); \
311*c0909341SAndroid Build Coastguard Worker \
312*c0909341SAndroid Build Coastguard Worker i32x4 n1 = vec_sub(i1, v2048); \
313*c0909341SAndroid Build Coastguard Worker i1 = vec_add(i1, v2048); \
314*c0909341SAndroid Build Coastguard Worker \
315*c0909341SAndroid Build Coastguard Worker \
316*c0909341SAndroid Build Coastguard Worker i32x4 o0 = vec_add(i0_v1321, i2_v3803); \
317*c0909341SAndroid Build Coastguard Worker i32x4 o1 = vec_sub(i0_v2482, i2_v1321); \
318*c0909341SAndroid Build Coastguard Worker i32x4 o2 = vec_sub(c0, c2); \
319*c0909341SAndroid Build Coastguard Worker i32x4 o3 = vec_add(i0_v3803, i2_v2482); \
320*c0909341SAndroid Build Coastguard Worker \
321*c0909341SAndroid Build Coastguard Worker o0 = vec_add(o0, i3_v2482); \
322*c0909341SAndroid Build Coastguard Worker o1 = vec_sub(o1, i3_v3803); \
323*c0909341SAndroid Build Coastguard Worker o2 = vec_add(o2, c3); \
324*c0909341SAndroid Build Coastguard Worker o3 = vec_sub(o3, i3_v1321); \
325*c0909341SAndroid Build Coastguard Worker \
326*c0909341SAndroid Build Coastguard Worker o0 = vec_add(o0, i1); \
327*c0909341SAndroid Build Coastguard Worker o1 = vec_add(o1, i1); \
328*c0909341SAndroid Build Coastguard Worker o2 = vec_mul(o2, v3344); \
329*c0909341SAndroid Build Coastguard Worker o3 = vec_sub(o3, n1); \
330*c0909341SAndroid Build Coastguard Worker \
331*c0909341SAndroid Build Coastguard Worker o2 = vec_add(o2, v2048); \
332*c0909341SAndroid Build Coastguard Worker \
333*c0909341SAndroid Build Coastguard Worker oc0 = vec_sra(o0, vec_splat_u32(12)); \
334*c0909341SAndroid Build Coastguard Worker oc1 = vec_sra(o1, vec_splat_u32(12)); \
335*c0909341SAndroid Build Coastguard Worker oc2 = vec_sra(o2, vec_splat_u32(12)); \
336*c0909341SAndroid Build Coastguard Worker oc3 = vec_sra(o3, vec_splat_u32(12)); \
337*c0909341SAndroid Build Coastguard Worker }
338*c0909341SAndroid Build Coastguard Worker
339*c0909341SAndroid Build Coastguard Worker #define adst_4_in(c0, c1, c2, c3, c01, c23) \
340*c0909341SAndroid Build Coastguard Worker { \
341*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c0, c1, c2, c3, c0, c1, c2, c3) \
342*c0909341SAndroid Build Coastguard Worker }
343*c0909341SAndroid Build Coastguard Worker
344*c0909341SAndroid Build Coastguard Worker #define flipadst_4_in(c0, c1, c2, c3, c01, c23) \
345*c0909341SAndroid Build Coastguard Worker { \
346*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c0, c1, c2, c3, c3, c2, c1, c0) \
347*c0909341SAndroid Build Coastguard Worker }
348*c0909341SAndroid Build Coastguard Worker
349*c0909341SAndroid Build Coastguard Worker #define adst_4_out(c0, c1, c2, c3, c01, c23) \
350*c0909341SAndroid Build Coastguard Worker { \
351*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c0, c1, c2, c3, c0, c1, c2, c3) \
352*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
353*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
354*c0909341SAndroid Build Coastguard Worker }
355*c0909341SAndroid Build Coastguard Worker
356*c0909341SAndroid Build Coastguard Worker #define flipadst_4_out(c0, c1, c2, c3, c01, c23) \
357*c0909341SAndroid Build Coastguard Worker { \
358*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c0, c1, c2, c3, c3, c2, c1, c0) \
359*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
360*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
361*c0909341SAndroid Build Coastguard Worker }
362*c0909341SAndroid Build Coastguard Worker
dc_only_4xN(uint8_t * dst,const ptrdiff_t stride,int16_t * const coeff,int n,int is_rect2,int shift)363*c0909341SAndroid Build Coastguard Worker static void dc_only_4xN(uint8_t *dst, const ptrdiff_t stride, int16_t *const coeff, int n, int is_rect2, int shift)
364*c0909341SAndroid Build Coastguard Worker {
365*c0909341SAndroid Build Coastguard Worker int dc = coeff[0];
366*c0909341SAndroid Build Coastguard Worker const int rnd = (1 << shift) >> 1;
367*c0909341SAndroid Build Coastguard Worker if (is_rect2)
368*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
369*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
370*c0909341SAndroid Build Coastguard Worker dc = (dc + rnd) >> shift;
371*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128 + 2048) >> 12;
372*c0909341SAndroid Build Coastguard Worker
373*c0909341SAndroid Build Coastguard Worker i16x8 vdc = vec_splats((int16_t)dc);
374*c0909341SAndroid Build Coastguard Worker coeff[0] = 0;
375*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < n; i++, dst += 4 * stride) {
376*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d)
377*c0909341SAndroid Build Coastguard Worker
378*c0909341SAndroid Build Coastguard Worker i16x8 as = u8h_to_i16(a);
379*c0909341SAndroid Build Coastguard Worker i16x8 bs = u8h_to_i16(b);
380*c0909341SAndroid Build Coastguard Worker i16x8 cs = u8h_to_i16(c);
381*c0909341SAndroid Build Coastguard Worker i16x8 ds = u8h_to_i16(d);
382*c0909341SAndroid Build Coastguard Worker
383*c0909341SAndroid Build Coastguard Worker as = vec_adds(as, vdc);
384*c0909341SAndroid Build Coastguard Worker bs = vec_adds(bs, vdc);
385*c0909341SAndroid Build Coastguard Worker cs = vec_adds(cs, vdc);
386*c0909341SAndroid Build Coastguard Worker ds = vec_adds(ds, vdc);
387*c0909341SAndroid Build Coastguard Worker
388*c0909341SAndroid Build Coastguard Worker a = vec_packsu(as, as);
389*c0909341SAndroid Build Coastguard Worker b = vec_packsu(bs, bs);
390*c0909341SAndroid Build Coastguard Worker c = vec_packsu(cs, cs);
391*c0909341SAndroid Build Coastguard Worker d = vec_packsu(ds, ds);
392*c0909341SAndroid Build Coastguard Worker
393*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, c, d)
394*c0909341SAndroid Build Coastguard Worker }
395*c0909341SAndroid Build Coastguard Worker }
396*c0909341SAndroid Build Coastguard Worker
dc_only_8xN(uint8_t * dst,const ptrdiff_t stride,int16_t * const coeff,int n,int is_rect2,int shift)397*c0909341SAndroid Build Coastguard Worker static void dc_only_8xN(uint8_t *dst, const ptrdiff_t stride, int16_t *const coeff, int n, int is_rect2, int shift)
398*c0909341SAndroid Build Coastguard Worker {
399*c0909341SAndroid Build Coastguard Worker int dc = coeff[0];
400*c0909341SAndroid Build Coastguard Worker const int rnd = (1 << shift) >> 1;
401*c0909341SAndroid Build Coastguard Worker if (is_rect2)
402*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
403*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
404*c0909341SAndroid Build Coastguard Worker dc = (dc + rnd) >> shift;
405*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128 + 2048) >> 12;
406*c0909341SAndroid Build Coastguard Worker
407*c0909341SAndroid Build Coastguard Worker i16x8 vdc = vec_splats((int16_t)dc);
408*c0909341SAndroid Build Coastguard Worker coeff[0] = 0;
409*c0909341SAndroid Build Coastguard Worker
410*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < n; i++, dst += 4 * stride) {
411*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d)
412*c0909341SAndroid Build Coastguard Worker
413*c0909341SAndroid Build Coastguard Worker i16x8 as = u8h_to_i16(a);
414*c0909341SAndroid Build Coastguard Worker i16x8 bs = u8h_to_i16(b);
415*c0909341SAndroid Build Coastguard Worker i16x8 cs = u8h_to_i16(c);
416*c0909341SAndroid Build Coastguard Worker i16x8 ds = u8h_to_i16(d);
417*c0909341SAndroid Build Coastguard Worker
418*c0909341SAndroid Build Coastguard Worker as = vec_adds(as, vdc);
419*c0909341SAndroid Build Coastguard Worker bs = vec_adds(bs, vdc);
420*c0909341SAndroid Build Coastguard Worker cs = vec_adds(cs, vdc);
421*c0909341SAndroid Build Coastguard Worker ds = vec_adds(ds, vdc);
422*c0909341SAndroid Build Coastguard Worker
423*c0909341SAndroid Build Coastguard Worker a = vec_packsu(as, as);
424*c0909341SAndroid Build Coastguard Worker b = vec_packsu(bs, bs);
425*c0909341SAndroid Build Coastguard Worker c = vec_packsu(cs, cs);
426*c0909341SAndroid Build Coastguard Worker d = vec_packsu(ds, ds);
427*c0909341SAndroid Build Coastguard Worker
428*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, a, b, c, d)
429*c0909341SAndroid Build Coastguard Worker }
430*c0909341SAndroid Build Coastguard Worker }
431*c0909341SAndroid Build Coastguard Worker
dc_only_16xN(uint8_t * dst,const ptrdiff_t stride,int16_t * const coeff,int n,int is_rect2,int shift)432*c0909341SAndroid Build Coastguard Worker static void dc_only_16xN(uint8_t *dst, const ptrdiff_t stride, int16_t *const coeff, int n, int is_rect2, int shift)
433*c0909341SAndroid Build Coastguard Worker {
434*c0909341SAndroid Build Coastguard Worker int dc = coeff[0];
435*c0909341SAndroid Build Coastguard Worker const int rnd = (1 << shift) >> 1;
436*c0909341SAndroid Build Coastguard Worker if (is_rect2)
437*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
438*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128) >> 8;
439*c0909341SAndroid Build Coastguard Worker dc = (dc + rnd) >> shift;
440*c0909341SAndroid Build Coastguard Worker dc = (dc * 181 + 128 + 2048) >> 12;
441*c0909341SAndroid Build Coastguard Worker
442*c0909341SAndroid Build Coastguard Worker i16x8 vdc = vec_splats((int16_t)dc);
443*c0909341SAndroid Build Coastguard Worker coeff[0] = 0;
444*c0909341SAndroid Build Coastguard Worker
445*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < n; i++, dst += 4 * stride) {
446*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d)
447*c0909341SAndroid Build Coastguard Worker
448*c0909341SAndroid Build Coastguard Worker i16x8 ah = u8h_to_i16(a);
449*c0909341SAndroid Build Coastguard Worker i16x8 bh = u8h_to_i16(b);
450*c0909341SAndroid Build Coastguard Worker i16x8 ch = u8h_to_i16(c);
451*c0909341SAndroid Build Coastguard Worker i16x8 dh = u8h_to_i16(d);
452*c0909341SAndroid Build Coastguard Worker i16x8 al = u8l_to_i16(a);
453*c0909341SAndroid Build Coastguard Worker i16x8 bl = u8l_to_i16(b);
454*c0909341SAndroid Build Coastguard Worker i16x8 cl = u8l_to_i16(c);
455*c0909341SAndroid Build Coastguard Worker i16x8 dl = u8l_to_i16(d);
456*c0909341SAndroid Build Coastguard Worker
457*c0909341SAndroid Build Coastguard Worker ah = vec_adds(ah, vdc);
458*c0909341SAndroid Build Coastguard Worker bh = vec_adds(bh, vdc);
459*c0909341SAndroid Build Coastguard Worker ch = vec_adds(ch, vdc);
460*c0909341SAndroid Build Coastguard Worker dh = vec_adds(dh, vdc);
461*c0909341SAndroid Build Coastguard Worker al = vec_adds(al, vdc);
462*c0909341SAndroid Build Coastguard Worker bl = vec_adds(bl, vdc);
463*c0909341SAndroid Build Coastguard Worker cl = vec_adds(cl, vdc);
464*c0909341SAndroid Build Coastguard Worker dl = vec_adds(dl, vdc);
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker a = vec_packsu(ah, al);
467*c0909341SAndroid Build Coastguard Worker b = vec_packsu(bh, bl);
468*c0909341SAndroid Build Coastguard Worker c = vec_packsu(ch, cl);
469*c0909341SAndroid Build Coastguard Worker d = vec_packsu(dh, dl);
470*c0909341SAndroid Build Coastguard Worker
471*c0909341SAndroid Build Coastguard Worker STORE_16(dst, stride, a, b, c, d)
472*c0909341SAndroid Build Coastguard Worker }
473*c0909341SAndroid Build Coastguard Worker }
474*c0909341SAndroid Build Coastguard Worker
dav1d_inv_txfm_add_dct_dct_4x4_8bpc_pwr9(uint8_t * dst,const ptrdiff_t stride,int16_t * const coeff,const int eob)475*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_4x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
476*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob)
477*c0909341SAndroid Build Coastguard Worker {
478*c0909341SAndroid Build Coastguard Worker assert(eob >= 0);
479*c0909341SAndroid Build Coastguard Worker
480*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
481*c0909341SAndroid Build Coastguard Worker return dc_only_4xN(dst, stride, coeff, 1, 0, 0);
482*c0909341SAndroid Build Coastguard Worker }
483*c0909341SAndroid Build Coastguard Worker
484*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_4(coeff)
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Worker dct_4_in(c0, c1, c2, c3, c01, c23)
487*c0909341SAndroid Build Coastguard Worker
488*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3)
489*c0909341SAndroid Build Coastguard Worker
490*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 4);
491*c0909341SAndroid Build Coastguard Worker
492*c0909341SAndroid Build Coastguard Worker dct_4_out(c0, c1, c2, c3, c01, c23)
493*c0909341SAndroid Build Coastguard Worker
494*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d)
495*c0909341SAndroid Build Coastguard Worker
496*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(a, b, c, d, c01, c23)
497*c0909341SAndroid Build Coastguard Worker
498*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, c, d)
499*c0909341SAndroid Build Coastguard Worker }
500*c0909341SAndroid Build Coastguard Worker
dav1d_inv_txfm_add_wht_wht_4x4_8bpc_pwr9(pixel * dst,const ptrdiff_t stride,coef * const coeff,const int eob)501*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_wht_wht_4x4_8bpc_pwr9(pixel *dst, const ptrdiff_t stride,
502*c0909341SAndroid Build Coastguard Worker coef *const coeff, const int eob)
503*c0909341SAndroid Build Coastguard Worker {
504*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_4(coeff)
505*c0909341SAndroid Build Coastguard Worker
506*c0909341SAndroid Build Coastguard Worker u32x4 v2 = vec_splat_u32(2);
507*c0909341SAndroid Build Coastguard Worker
508*c0909341SAndroid Build Coastguard Worker c0 = vec_sra(c0, v2);
509*c0909341SAndroid Build Coastguard Worker c1 = vec_sra(c1, v2);
510*c0909341SAndroid Build Coastguard Worker c2 = vec_sra(c2, v2);
511*c0909341SAndroid Build Coastguard Worker c3 = vec_sra(c3, v2);
512*c0909341SAndroid Build Coastguard Worker
513*c0909341SAndroid Build Coastguard Worker i32x4 t0 = vec_add(c0, c1);
514*c0909341SAndroid Build Coastguard Worker i32x4 t2 = vec_sub(c2, c3);
515*c0909341SAndroid Build Coastguard Worker i32x4 t4 = vec_sra(vec_sub(t0, t2), vec_splat_u32(1));
516*c0909341SAndroid Build Coastguard Worker i32x4 t3 = vec_sub(t4, c3);
517*c0909341SAndroid Build Coastguard Worker i32x4 t1 = vec_sub(t4, c1);
518*c0909341SAndroid Build Coastguard Worker c0 = vec_sub(t0, t3);
519*c0909341SAndroid Build Coastguard Worker c1 = t3;
520*c0909341SAndroid Build Coastguard Worker c2 = t1;
521*c0909341SAndroid Build Coastguard Worker c3 = vec_add(t2, t1);
522*c0909341SAndroid Build Coastguard Worker
523*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 4);
524*c0909341SAndroid Build Coastguard Worker
525*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3)
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard Worker t0 = vec_add(c0, c1);
528*c0909341SAndroid Build Coastguard Worker t2 = vec_sub(c2, c3);
529*c0909341SAndroid Build Coastguard Worker t4 = vec_sra(vec_sub(t0, t2), vec_splat_u32(1));
530*c0909341SAndroid Build Coastguard Worker t3 = vec_sub(t4, c3);
531*c0909341SAndroid Build Coastguard Worker t1 = vec_sub(t4, c1);
532*c0909341SAndroid Build Coastguard Worker c0 = vec_sub(t0, t3);
533*c0909341SAndroid Build Coastguard Worker c1 = t3;
534*c0909341SAndroid Build Coastguard Worker c2 = t1;
535*c0909341SAndroid Build Coastguard Worker c3 = vec_add(t2, t1);
536*c0909341SAndroid Build Coastguard Worker
537*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1);
538*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3);
539*c0909341SAndroid Build Coastguard Worker
540*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d)
541*c0909341SAndroid Build Coastguard Worker
542*c0909341SAndroid Build Coastguard Worker u8x16 ab = (u8x16)vec_mergeh((u32x4)a, (u32x4)b);
543*c0909341SAndroid Build Coastguard Worker u8x16 cd = (u8x16)vec_mergeh((u32x4)c, (u32x4)d);
544*c0909341SAndroid Build Coastguard Worker
545*c0909341SAndroid Build Coastguard Worker i16x8 abs = u8h_to_i16(ab);
546*c0909341SAndroid Build Coastguard Worker i16x8 cds = u8h_to_i16(cd);
547*c0909341SAndroid Build Coastguard Worker
548*c0909341SAndroid Build Coastguard Worker abs = vec_adds(abs, c01);
549*c0909341SAndroid Build Coastguard Worker cds = vec_adds(cds, c23);
550*c0909341SAndroid Build Coastguard Worker
551*c0909341SAndroid Build Coastguard Worker a = vec_packsu(abs, abs);
552*c0909341SAndroid Build Coastguard Worker c = vec_packsu(cds, cds);
553*c0909341SAndroid Build Coastguard Worker
554*c0909341SAndroid Build Coastguard Worker b = (u8x16)vec_mergeo((u32x4)a, (u32x4)a);
555*c0909341SAndroid Build Coastguard Worker d = (u8x16)vec_mergeo((u32x4)c, (u32x4)c);
556*c0909341SAndroid Build Coastguard Worker
557*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, c, d)
558*c0909341SAndroid Build Coastguard Worker }
559*c0909341SAndroid Build Coastguard Worker
560*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn4x4(type1, type2) \
561*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_4x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
562*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
563*c0909341SAndroid Build Coastguard Worker { \
564*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_4(coeff) \
565*c0909341SAndroid Build Coastguard Worker type1##_4_in(c0, c1, c2, c3, c01, c23) \
566*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 4); \
567*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3) \
568*c0909341SAndroid Build Coastguard Worker type2##_4_out(c0, c1, c2, c3, c01, c23) \
569*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d) \
570*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(a, b, c, d, c01, c23) \
571*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, c, d) \
572*c0909341SAndroid Build Coastguard Worker }
573*c0909341SAndroid Build Coastguard Worker
inv_txfm_fn4x4(adst,dct)574*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(adst, dct )
575*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(dct, adst )
576*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(dct, flipadst)
577*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(flipadst, dct )
578*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(adst, flipadst)
579*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(flipadst, adst )
580*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(identity, dct )
581*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(dct, identity)
582*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(identity, flipadst)
583*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(flipadst, identity)
584*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(identity, adst )
585*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(adst, identity)
586*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(identity, identity)
587*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(adst, adst )
588*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x4(flipadst, flipadst)
589*c0909341SAndroid Build Coastguard Worker
590*c0909341SAndroid Build Coastguard Worker
591*c0909341SAndroid Build Coastguard Worker #define IDCT_8_INNER(c0, c1, c2, c3, c4, c5, c6, c7, c03, c12, c74, c65) \
592*c0909341SAndroid Build Coastguard Worker dct4_for_dct8(c0, c2, c4, c6, c03, c12) \
593*c0909341SAndroid Build Coastguard Worker \
594*c0909341SAndroid Build Coastguard Worker i32x4 v799 = vec_splats(799); \
595*c0909341SAndroid Build Coastguard Worker i32x4 v4017 = vec_splats(4017); \
596*c0909341SAndroid Build Coastguard Worker i32x4 v3406 = vec_splats(3406); \
597*c0909341SAndroid Build Coastguard Worker i32x4 v2276 = vec_splats(2276); \
598*c0909341SAndroid Build Coastguard Worker i32x4 v2048 = vec_splats(2048); \
599*c0909341SAndroid Build Coastguard Worker u32x4 v12 = vec_splat_u32(12); \
600*c0909341SAndroid Build Coastguard Worker \
601*c0909341SAndroid Build Coastguard Worker i32x4 c1v799 = vec_mul(c1, v799); \
602*c0909341SAndroid Build Coastguard Worker i32x4 c7v4017 = vec_mul(c7, v4017); \
603*c0909341SAndroid Build Coastguard Worker i32x4 c5v3406 = vec_mul(c5, v3406); \
604*c0909341SAndroid Build Coastguard Worker i32x4 c3v2276 = vec_mul(c3, v2276); \
605*c0909341SAndroid Build Coastguard Worker i32x4 c5v2276 = vec_mul(c5, v2276); \
606*c0909341SAndroid Build Coastguard Worker i32x4 c3v3406 = vec_mul(c3, v3406); \
607*c0909341SAndroid Build Coastguard Worker i32x4 c1v4017 = vec_mul(c1, v4017); \
608*c0909341SAndroid Build Coastguard Worker i32x4 c7v799 = vec_mul(c7, v799); \
609*c0909341SAndroid Build Coastguard Worker \
610*c0909341SAndroid Build Coastguard Worker i32x4 t4a = vec_subs(c1v799, c7v4017); \
611*c0909341SAndroid Build Coastguard Worker i32x4 t5a = vec_subs(c5v3406, c3v2276); \
612*c0909341SAndroid Build Coastguard Worker i32x4 t6a = vec_adds(c5v2276, c3v3406); \
613*c0909341SAndroid Build Coastguard Worker i32x4 t7a = vec_adds(c1v4017, c7v799); \
614*c0909341SAndroid Build Coastguard Worker \
615*c0909341SAndroid Build Coastguard Worker t4a = vec_adds(t4a, v2048); \
616*c0909341SAndroid Build Coastguard Worker t5a = vec_adds(t5a, v2048); \
617*c0909341SAndroid Build Coastguard Worker t6a = vec_adds(t6a, v2048); \
618*c0909341SAndroid Build Coastguard Worker t7a = vec_adds(t7a, v2048); \
619*c0909341SAndroid Build Coastguard Worker \
620*c0909341SAndroid Build Coastguard Worker t4a = vec_sra(t4a, v12); \
621*c0909341SAndroid Build Coastguard Worker t7a = vec_sra(t7a, v12); \
622*c0909341SAndroid Build Coastguard Worker t5a = vec_sra(t5a, v12); \
623*c0909341SAndroid Build Coastguard Worker t6a = vec_sra(t6a, v12); \
624*c0909341SAndroid Build Coastguard Worker \
625*c0909341SAndroid Build Coastguard Worker i16x8 t7at4a = vec_packs(t7a, t4a); \
626*c0909341SAndroid Build Coastguard Worker i16x8 t6at5a = vec_packs(t6a, t5a); \
627*c0909341SAndroid Build Coastguard Worker \
628*c0909341SAndroid Build Coastguard Worker i16x8 t7t4 = vec_adds(t7at4a, t6at5a); \
629*c0909341SAndroid Build Coastguard Worker t6at5a = vec_subs(t7at4a, t6at5a); \
630*c0909341SAndroid Build Coastguard Worker \
631*c0909341SAndroid Build Coastguard Worker t6a = i16h_to_i32(t6at5a); \
632*c0909341SAndroid Build Coastguard Worker t5a = i16l_to_i32(t6at5a); \
633*c0909341SAndroid Build Coastguard Worker \
634*c0909341SAndroid Build Coastguard Worker i32x4 t6 = vec_add(t6a, t5a); \
635*c0909341SAndroid Build Coastguard Worker i32x4 t5 = vec_sub(t6a, t5a); \
636*c0909341SAndroid Build Coastguard Worker \
637*c0909341SAndroid Build Coastguard Worker t6 = vec_mul(t6, vec_splats(181)); \
638*c0909341SAndroid Build Coastguard Worker t5 = vec_mul(t5, vec_splats(181)); \
639*c0909341SAndroid Build Coastguard Worker t6 = vec_add(t6, vec_splats(128)); \
640*c0909341SAndroid Build Coastguard Worker t5 = vec_add(t5, vec_splats(128)); \
641*c0909341SAndroid Build Coastguard Worker \
642*c0909341SAndroid Build Coastguard Worker t6 = vec_sra(t6, vec_splat_u32(8)); \
643*c0909341SAndroid Build Coastguard Worker t5 = vec_sra(t5, vec_splat_u32(8)); \
644*c0909341SAndroid Build Coastguard Worker \
645*c0909341SAndroid Build Coastguard Worker i16x8 t6t5 = vec_packs(t6, t5); \
646*c0909341SAndroid Build Coastguard Worker \
647*c0909341SAndroid Build Coastguard Worker c74 = vec_subs(c03, t7t4); \
648*c0909341SAndroid Build Coastguard Worker c65 = vec_subs(c12, t6t5); \
649*c0909341SAndroid Build Coastguard Worker c03 = vec_adds(c03, t7t4); \
650*c0909341SAndroid Build Coastguard Worker c12 = vec_adds(c12, t6t5); \
651*c0909341SAndroid Build Coastguard Worker
652*c0909341SAndroid Build Coastguard Worker #define UNPACK_4_I16_I32(t0, t1, t2, t3) \
653*c0909341SAndroid Build Coastguard Worker t0 = i16h_to_i32(t0##t1); \
654*c0909341SAndroid Build Coastguard Worker t1 = i16l_to_i32(t0##t1); \
655*c0909341SAndroid Build Coastguard Worker t2 = i16h_to_i32(t2##t3); \
656*c0909341SAndroid Build Coastguard Worker t3 = i16l_to_i32(t2##t3);
657*c0909341SAndroid Build Coastguard Worker
658*c0909341SAndroid Build Coastguard Worker #define UNPACK_PAIR_I16_I32(hi, lo, v) \
659*c0909341SAndroid Build Coastguard Worker hi = i16h_to_i32(v); \
660*c0909341SAndroid Build Coastguard Worker lo = i16l_to_i32(v); \
661*c0909341SAndroid Build Coastguard Worker
662*c0909341SAndroid Build Coastguard Worker
663*c0909341SAndroid Build Coastguard Worker #define dct_8_in(c0, c1, c2, c3, c4, c5, c6, c7, ...) \
664*c0909341SAndroid Build Coastguard Worker { \
665*c0909341SAndroid Build Coastguard Worker i16x8 c0##c3, c1##c2, c7##c4, c6##c5; \
666*c0909341SAndroid Build Coastguard Worker IDCT_8_INNER(c0, c1, c2, c3, c4, c5, c6, c7, c0##c3, c1##c2, c7##c4, c6##c5) \
667*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(c0, c3, c1, c2) \
668*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(c7, c4, c6, c5) \
669*c0909341SAndroid Build Coastguard Worker }
670*c0909341SAndroid Build Coastguard Worker
671*c0909341SAndroid Build Coastguard Worker #define dct_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
672*c0909341SAndroid Build Coastguard Worker { \
673*c0909341SAndroid Build Coastguard Worker i16x8 c03, c12, c74, c65; \
674*c0909341SAndroid Build Coastguard Worker IDCT_8_INNER(c0, c1, c2, c3, c4, c5, c6, c7, c03, c12, c74, c65) \
675*c0909341SAndroid Build Coastguard Worker c01 = (i16x8)vec_mergeh((u64x2)c03, (u64x2)c12); \
676*c0909341SAndroid Build Coastguard Worker c23 = (i16x8)vec_mergel((u64x2)c12, (u64x2)c03); \
677*c0909341SAndroid Build Coastguard Worker c45 = (i16x8)vec_mergel((u64x2)c74, (u64x2)c65); \
678*c0909341SAndroid Build Coastguard Worker c67 = (i16x8)vec_mergeh((u64x2)c65, (u64x2)c74); \
679*c0909341SAndroid Build Coastguard Worker }
680*c0909341SAndroid Build Coastguard Worker
681*c0909341SAndroid Build Coastguard Worker #define dct_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
682*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
683*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
684*c0909341SAndroid Build Coastguard Worker { \
685*c0909341SAndroid Build Coastguard Worker dct_8_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h,) \
686*c0909341SAndroid Build Coastguard Worker dct_8_in(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l,) \
687*c0909341SAndroid Build Coastguard Worker }
688*c0909341SAndroid Build Coastguard Worker
689*c0909341SAndroid Build Coastguard Worker #define dct_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
690*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
691*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
692*c0909341SAndroid Build Coastguard Worker { \
693*c0909341SAndroid Build Coastguard Worker i16x8 c03h, c12h, c74h, c65h; \
694*c0909341SAndroid Build Coastguard Worker i16x8 c03l, c12l, c74l, c65l; \
695*c0909341SAndroid Build Coastguard Worker { \
696*c0909341SAndroid Build Coastguard Worker IDCT_8_INNER(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, c03h, c12h, c74h, c65h) \
697*c0909341SAndroid Build Coastguard Worker } \
698*c0909341SAndroid Build Coastguard Worker { \
699*c0909341SAndroid Build Coastguard Worker IDCT_8_INNER(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, c03l, c12l, c74l, c65l) \
700*c0909341SAndroid Build Coastguard Worker } \
701*c0909341SAndroid Build Coastguard Worker c0 = (i16x8)vec_mergeh((u64x2)c03h, (u64x2)c03l); \
702*c0909341SAndroid Build Coastguard Worker c3 = (i16x8)vec_mergel((u64x2)c03h, (u64x2)c03l); \
703*c0909341SAndroid Build Coastguard Worker c1 = (i16x8)vec_mergeh((u64x2)c12h, (u64x2)c12l); \
704*c0909341SAndroid Build Coastguard Worker c2 = (i16x8)vec_mergel((u64x2)c12h, (u64x2)c12l); \
705*c0909341SAndroid Build Coastguard Worker c7 = (i16x8)vec_mergeh((u64x2)c74h, (u64x2)c74l); \
706*c0909341SAndroid Build Coastguard Worker c4 = (i16x8)vec_mergel((u64x2)c74h, (u64x2)c74l); \
707*c0909341SAndroid Build Coastguard Worker c6 = (i16x8)vec_mergeh((u64x2)c65h, (u64x2)c65l); \
708*c0909341SAndroid Build Coastguard Worker c5 = (i16x8)vec_mergel((u64x2)c65h, (u64x2)c65l); \
709*c0909341SAndroid Build Coastguard Worker }
710*c0909341SAndroid Build Coastguard Worker
711*c0909341SAndroid Build Coastguard Worker #define IDENTITY_8(c01, c23, c45, c67) \
712*c0909341SAndroid Build Coastguard Worker { \
713*c0909341SAndroid Build Coastguard Worker c01 = vec_adds(c01, c01); \
714*c0909341SAndroid Build Coastguard Worker c23 = vec_adds(c23, c23); \
715*c0909341SAndroid Build Coastguard Worker c45 = vec_adds(c45, c45); \
716*c0909341SAndroid Build Coastguard Worker c67 = vec_adds(c67, c67); \
717*c0909341SAndroid Build Coastguard Worker }
718*c0909341SAndroid Build Coastguard Worker
719*c0909341SAndroid Build Coastguard Worker #define identity_8_in(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
720*c0909341SAndroid Build Coastguard Worker { \
721*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c01, c23, c45, c67) \
722*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c0, c1, c01) \
723*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c2, c3, c23) \
724*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c4, c5, c45) \
725*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c6, c7, c67) \
726*c0909341SAndroid Build Coastguard Worker }
727*c0909341SAndroid Build Coastguard Worker
728*c0909341SAndroid Build Coastguard Worker #define identity_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
729*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
730*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
731*c0909341SAndroid Build Coastguard Worker c45 = vec_packs(c4, c5); \
732*c0909341SAndroid Build Coastguard Worker c67 = vec_packs(c6, c7); \
733*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c01, c23, c45, c67)
734*c0909341SAndroid Build Coastguard Worker
735*c0909341SAndroid Build Coastguard Worker #define identity_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
736*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
737*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
738*c0909341SAndroid Build Coastguard Worker { \
739*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c0, c1, c2, c3) \
740*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c4, c5, c6, c7) \
741*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c0h, c0l, c0) \
742*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c1h, c1l, c1) \
743*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c2h, c2l, c2) \
744*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c3h, c3l, c3) \
745*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c4h, c4l, c4) \
746*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c5h, c5l, c5) \
747*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c6h, c6l, c6) \
748*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c7h, c7l, c7) \
749*c0909341SAndroid Build Coastguard Worker }
750*c0909341SAndroid Build Coastguard Worker
751*c0909341SAndroid Build Coastguard Worker #define PACK_4(c0, c1, c2, c3, \
752*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, \
753*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l) \
754*c0909341SAndroid Build Coastguard Worker { \
755*c0909341SAndroid Build Coastguard Worker c0 = vec_packs(c0h, c0l); \
756*c0909341SAndroid Build Coastguard Worker c1 = vec_packs(c1h, c1l); \
757*c0909341SAndroid Build Coastguard Worker c2 = vec_packs(c2h, c2l); \
758*c0909341SAndroid Build Coastguard Worker c3 = vec_packs(c3h, c3l); \
759*c0909341SAndroid Build Coastguard Worker }
760*c0909341SAndroid Build Coastguard Worker
761*c0909341SAndroid Build Coastguard Worker #define DECLARE_PACK_4(c0, c1, c2, c3, \
762*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, \
763*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l) \
764*c0909341SAndroid Build Coastguard Worker i16x8 c0, c1, c2, c3; \
765*c0909341SAndroid Build Coastguard Worker PACK_4(c0, c1, c2, c3, c0h, c1h, c2h, c3h, c0l, c1l, c2l, c3l);
766*c0909341SAndroid Build Coastguard Worker
767*c0909341SAndroid Build Coastguard Worker #define PACK_8(c0, c1, c2, c3, c4, c5, c6, c7, \
768*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
769*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
770*c0909341SAndroid Build Coastguard Worker { \
771*c0909341SAndroid Build Coastguard Worker c0 = vec_packs(c0h, c0l); \
772*c0909341SAndroid Build Coastguard Worker c1 = vec_packs(c1h, c1l); \
773*c0909341SAndroid Build Coastguard Worker c2 = vec_packs(c2h, c2l); \
774*c0909341SAndroid Build Coastguard Worker c3 = vec_packs(c3h, c3l); \
775*c0909341SAndroid Build Coastguard Worker c4 = vec_packs(c4h, c4l); \
776*c0909341SAndroid Build Coastguard Worker c5 = vec_packs(c5h, c5l); \
777*c0909341SAndroid Build Coastguard Worker c6 = vec_packs(c6h, c6l); \
778*c0909341SAndroid Build Coastguard Worker c7 = vec_packs(c7h, c7l); \
779*c0909341SAndroid Build Coastguard Worker }
780*c0909341SAndroid Build Coastguard Worker
781*c0909341SAndroid Build Coastguard Worker #define identity_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
782*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
783*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
784*c0909341SAndroid Build Coastguard Worker { \
785*c0909341SAndroid Build Coastguard Worker PACK_8(c0, c1, c2, c3, c4, c5, c6, c7, \
786*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
787*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
788*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c0, c1, c2, c3) \
789*c0909341SAndroid Build Coastguard Worker IDENTITY_8(c4, c5, c6, c7) \
790*c0909341SAndroid Build Coastguard Worker }
791*c0909341SAndroid Build Coastguard Worker
792*c0909341SAndroid Build Coastguard Worker #define DECLARE_SPLAT_I32(val) \
793*c0909341SAndroid Build Coastguard Worker i32x4 v##val = vec_splats(val);
794*c0909341SAndroid Build Coastguard Worker
795*c0909341SAndroid Build Coastguard Worker #define DECLARE_MUL_PAIR_I32(ca, cb, va, vb) \
796*c0909341SAndroid Build Coastguard Worker i32x4 ca##va = vec_mul(ca, va); \
797*c0909341SAndroid Build Coastguard Worker i32x4 cb##vb = vec_mul(cb, vb); \
798*c0909341SAndroid Build Coastguard Worker i32x4 ca##vb = vec_mul(ca, vb); \
799*c0909341SAndroid Build Coastguard Worker i32x4 cb##va = vec_mul(cb, va);
800*c0909341SAndroid Build Coastguard Worker
801*c0909341SAndroid Build Coastguard Worker #define ADD_SUB_PAIR(r0, r1, ca, cb, va, vb) \
802*c0909341SAndroid Build Coastguard Worker r0 = vec_adds(ca##va, cb##vb); \
803*c0909341SAndroid Build Coastguard Worker r1 = vec_subs(ca##vb, cb##va);
804*c0909341SAndroid Build Coastguard Worker
805*c0909341SAndroid Build Coastguard Worker #define DECLARE_ADD_SUB_PAIR(r0, r1, ca, cb, va, vb) \
806*c0909341SAndroid Build Coastguard Worker i32x4 r0, r1; \
807*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(r0, r1, ca, cb, va, vb)
808*c0909341SAndroid Build Coastguard Worker
809*c0909341SAndroid Build Coastguard Worker #define SCALE_ROUND_4(a, b, c, d, rnd, shift) \
810*c0909341SAndroid Build Coastguard Worker a = vec_adds(a, rnd); \
811*c0909341SAndroid Build Coastguard Worker b = vec_adds(b, rnd); \
812*c0909341SAndroid Build Coastguard Worker c = vec_adds(c, rnd); \
813*c0909341SAndroid Build Coastguard Worker d = vec_adds(d, rnd); \
814*c0909341SAndroid Build Coastguard Worker a = vec_sra(a, shift); \
815*c0909341SAndroid Build Coastguard Worker b = vec_sra(b, shift); \
816*c0909341SAndroid Build Coastguard Worker c = vec_sra(c, shift); \
817*c0909341SAndroid Build Coastguard Worker d = vec_sra(d, shift);
818*c0909341SAndroid Build Coastguard Worker
819*c0909341SAndroid Build Coastguard Worker #define ADST_INNER_8(c0, c1, c2, c3, c4, c5, c6, c7, \
820*c0909341SAndroid Build Coastguard Worker o0, o1, o2, o3, o4, o5, o6, o7) \
821*c0909341SAndroid Build Coastguard Worker { \
822*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(4076) \
823*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(401) \
824*c0909341SAndroid Build Coastguard Worker \
825*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3612) \
826*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1931) \
827*c0909341SAndroid Build Coastguard Worker \
828*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2598) \
829*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3166) \
830*c0909341SAndroid Build Coastguard Worker \
831*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1189) \
832*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3920) \
833*c0909341SAndroid Build Coastguard Worker \
834*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3784) \
835*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1567) \
836*c0909341SAndroid Build Coastguard Worker \
837*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2048) \
838*c0909341SAndroid Build Coastguard Worker u32x4 v12 = vec_splat_u32(12); \
839*c0909341SAndroid Build Coastguard Worker \
840*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c7, c0, v4076, v401) \
841*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c5, c2, v3612, v1931) \
842*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c3, c4, v2598, v3166) \
843*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c1, c6, v1189, v3920) \
844*c0909341SAndroid Build Coastguard Worker \
845*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t0a, t1a, c7, c0, v4076, v401) \
846*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t2a, t3a, c5, c2, v3612, v1931) \
847*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t4a, t5a, c3, c4, v2598, v3166) \
848*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t6a, t7a, c1, c6, v1189, v3920) \
849*c0909341SAndroid Build Coastguard Worker \
850*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t0a, t1a, t2a, t3a, v2048, v12) \
851*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t4a, t5a, t6a, t7a, v2048, v12) \
852*c0909341SAndroid Build Coastguard Worker \
853*c0909341SAndroid Build Coastguard Worker i32x4 t0 = vec_add(t0a, t4a); \
854*c0909341SAndroid Build Coastguard Worker i32x4 t1 = vec_add(t1a, t5a); \
855*c0909341SAndroid Build Coastguard Worker i32x4 t2 = vec_add(t2a, t6a); \
856*c0909341SAndroid Build Coastguard Worker i32x4 t3 = vec_add(t3a, t7a); \
857*c0909341SAndroid Build Coastguard Worker i32x4 t4 = vec_sub(t0a, t4a); \
858*c0909341SAndroid Build Coastguard Worker i32x4 t5 = vec_sub(t1a, t5a); \
859*c0909341SAndroid Build Coastguard Worker i32x4 t6 = vec_sub(t2a, t6a); \
860*c0909341SAndroid Build Coastguard Worker i32x4 t7 = vec_sub(t3a, t7a); \
861*c0909341SAndroid Build Coastguard Worker \
862*c0909341SAndroid Build Coastguard Worker i16x8 t0t1 = vec_packs(t0, t1); \
863*c0909341SAndroid Build Coastguard Worker i16x8 t2t3 = vec_packs(t2, t3); \
864*c0909341SAndroid Build Coastguard Worker i16x8 t4t5 = vec_packs(t4, t5); \
865*c0909341SAndroid Build Coastguard Worker i16x8 t6t7 = vec_packs(t6, t7); \
866*c0909341SAndroid Build Coastguard Worker \
867*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(t4, t5, t6, t7) \
868*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(t0, t1, t2, t3) \
869*c0909341SAndroid Build Coastguard Worker \
870*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t4, t5, v3784, v1567) \
871*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t7, t6, v3784, v1567) \
872*c0909341SAndroid Build Coastguard Worker \
873*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t4a, t5a, t4, t5, v3784, v1567) \
874*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t7a, t6a, t7, t6, v1567, v3784) \
875*c0909341SAndroid Build Coastguard Worker \
876*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t4a, t5a, t6a, t7a, v2048, v12) \
877*c0909341SAndroid Build Coastguard Worker \
878*c0909341SAndroid Build Coastguard Worker o0 = vec_add(t0, t2); \
879*c0909341SAndroid Build Coastguard Worker o1 = vec_add(t4a, t6a); \
880*c0909341SAndroid Build Coastguard Worker o7 = vec_add(t1, t3); \
881*c0909341SAndroid Build Coastguard Worker o6 = vec_add(t5a, t7a); \
882*c0909341SAndroid Build Coastguard Worker t2 = vec_sub(t0, t2); \
883*c0909341SAndroid Build Coastguard Worker t3 = vec_sub(t1, t3); \
884*c0909341SAndroid Build Coastguard Worker t6 = vec_sub(t4a, t6a); \
885*c0909341SAndroid Build Coastguard Worker t7 = vec_sub(t5a, t7a); \
886*c0909341SAndroid Build Coastguard Worker \
887*c0909341SAndroid Build Coastguard Worker i16x8 o7##o1 = vec_packs(o7, o1); \
888*c0909341SAndroid Build Coastguard Worker i16x8 o0##o6 = vec_packs(o0, o6); \
889*c0909341SAndroid Build Coastguard Worker t2t3 = vec_packs(t2, t3); \
890*c0909341SAndroid Build Coastguard Worker t6t7 = vec_packs(t6, t7); \
891*c0909341SAndroid Build Coastguard Worker \
892*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(t2, t3, t6, t7) \
893*c0909341SAndroid Build Coastguard Worker UNPACK_4_I16_I32(o7, o1, o0, o6) \
894*c0909341SAndroid Build Coastguard Worker \
895*c0909341SAndroid Build Coastguard Worker o7 = -o7; \
896*c0909341SAndroid Build Coastguard Worker o1 = -o1; \
897*c0909341SAndroid Build Coastguard Worker \
898*c0909341SAndroid Build Coastguard Worker o3 = vec_add(t2, t3); \
899*c0909341SAndroid Build Coastguard Worker o4 = vec_sub(t2, t3); \
900*c0909341SAndroid Build Coastguard Worker o5 = vec_sub(t6, t7); \
901*c0909341SAndroid Build Coastguard Worker o2 = vec_add(t6, t7); \
902*c0909341SAndroid Build Coastguard Worker \
903*c0909341SAndroid Build Coastguard Worker i32x4 v181 = vec_splats(181); \
904*c0909341SAndroid Build Coastguard Worker i32x4 v128 = vec_splats(128); \
905*c0909341SAndroid Build Coastguard Worker u32x4 v8 = vec_splat_u32(8); \
906*c0909341SAndroid Build Coastguard Worker \
907*c0909341SAndroid Build Coastguard Worker o2 = vec_mul(o2, v181); \
908*c0909341SAndroid Build Coastguard Worker o3 = vec_mul(o3, v181); \
909*c0909341SAndroid Build Coastguard Worker o4 = vec_mul(o4, v181); \
910*c0909341SAndroid Build Coastguard Worker o5 = vec_mul(o5, v181); \
911*c0909341SAndroid Build Coastguard Worker \
912*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(o2, o3, o4, o5, v128, v8) \
913*c0909341SAndroid Build Coastguard Worker \
914*c0909341SAndroid Build Coastguard Worker o3 = -o3; \
915*c0909341SAndroid Build Coastguard Worker o5 = -o5; \
916*c0909341SAndroid Build Coastguard Worker }
917*c0909341SAndroid Build Coastguard Worker
918*c0909341SAndroid Build Coastguard Worker #define adst_8_in(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
919*c0909341SAndroid Build Coastguard Worker {\
920*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0, c1, c2, c3, c4, c5, c6, c7, \
921*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
922*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
923*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
924*c0909341SAndroid Build Coastguard Worker c45 = vec_packs(c4, c5); \
925*c0909341SAndroid Build Coastguard Worker c67 = vec_packs(c6, c7); \
926*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c0, c1, c01) \
927*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c2, c3, c23) \
928*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c4, c5, c45) \
929*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c6, c7, c67) \
930*c0909341SAndroid Build Coastguard Worker }
931*c0909341SAndroid Build Coastguard Worker
932*c0909341SAndroid Build Coastguard Worker #define adst_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
933*c0909341SAndroid Build Coastguard Worker {\
934*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0, c1, c2, c3, c4, c5, c6, c7, \
935*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
936*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
937*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
938*c0909341SAndroid Build Coastguard Worker c45 = vec_packs(c4, c5); \
939*c0909341SAndroid Build Coastguard Worker c67 = vec_packs(c6, c7); \
940*c0909341SAndroid Build Coastguard Worker }
941*c0909341SAndroid Build Coastguard Worker
942*c0909341SAndroid Build Coastguard Worker #define adst_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
943*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
944*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
945*c0909341SAndroid Build Coastguard Worker { \
946*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
947*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h) \
948*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
949*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
950*c0909341SAndroid Build Coastguard Worker }
951*c0909341SAndroid Build Coastguard Worker
952*c0909341SAndroid Build Coastguard Worker #define adst_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
953*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
954*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
955*c0909341SAndroid Build Coastguard Worker { \
956*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
957*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h) \
958*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
959*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
960*c0909341SAndroid Build Coastguard Worker PACK_8(c0, c1, c2, c3, c4, c5, c6, c7, \
961*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
962*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
963*c0909341SAndroid Build Coastguard Worker }
964*c0909341SAndroid Build Coastguard Worker
965*c0909341SAndroid Build Coastguard Worker #define flipadst_8_in(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
966*c0909341SAndroid Build Coastguard Worker {\
967*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0, c1, c2, c3, c4, c5, c6, c7, \
968*c0909341SAndroid Build Coastguard Worker c7, c6, c5, c4, c3, c2, c1, c0) \
969*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
970*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
971*c0909341SAndroid Build Coastguard Worker c45 = vec_packs(c4, c5); \
972*c0909341SAndroid Build Coastguard Worker c67 = vec_packs(c6, c7); \
973*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c0, c1, c01) \
974*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c2, c3, c23) \
975*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c4, c5, c45) \
976*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c6, c7, c67) \
977*c0909341SAndroid Build Coastguard Worker }
978*c0909341SAndroid Build Coastguard Worker
979*c0909341SAndroid Build Coastguard Worker #define flipadst_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
980*c0909341SAndroid Build Coastguard Worker {\
981*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0, c1, c2, c3, c4, c5, c6, c7, \
982*c0909341SAndroid Build Coastguard Worker c7, c6, c5, c4, c3, c2, c1, c0) \
983*c0909341SAndroid Build Coastguard Worker c01 = vec_packs(c0, c1); \
984*c0909341SAndroid Build Coastguard Worker c23 = vec_packs(c2, c3); \
985*c0909341SAndroid Build Coastguard Worker c45 = vec_packs(c4, c5); \
986*c0909341SAndroid Build Coastguard Worker c67 = vec_packs(c6, c7); \
987*c0909341SAndroid Build Coastguard Worker }
988*c0909341SAndroid Build Coastguard Worker
989*c0909341SAndroid Build Coastguard Worker #define flipadst_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
990*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
991*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
992*c0909341SAndroid Build Coastguard Worker { \
993*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
994*c0909341SAndroid Build Coastguard Worker c7h, c6h, c5h, c4h, c3h, c2h, c1h, c0h) \
995*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
996*c0909341SAndroid Build Coastguard Worker c7l, c6l, c5l, c4l, c3l, c2l, c1l, c0l) \
997*c0909341SAndroid Build Coastguard Worker }
998*c0909341SAndroid Build Coastguard Worker
999*c0909341SAndroid Build Coastguard Worker #define flipadst_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1000*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
1001*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
1002*c0909341SAndroid Build Coastguard Worker { \
1003*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1004*c0909341SAndroid Build Coastguard Worker c7h, c6h, c5h, c4h, c3h, c2h, c1h, c0h) \
1005*c0909341SAndroid Build Coastguard Worker ADST_INNER_8(c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
1006*c0909341SAndroid Build Coastguard Worker c7l, c6l, c5l, c4l, c3l, c2l, c1l, c0l) \
1007*c0909341SAndroid Build Coastguard Worker PACK_8(c0, c1, c2, c3, c4, c5, c6, c7, \
1008*c0909341SAndroid Build Coastguard Worker c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1009*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
1010*c0909341SAndroid Build Coastguard Worker }
1011*c0909341SAndroid Build Coastguard Worker
1012*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_4x8_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
1013*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob)
1014*c0909341SAndroid Build Coastguard Worker {
1015*c0909341SAndroid Build Coastguard Worker i16x8 v = vec_splats((int16_t)(2896*8));
1016*c0909341SAndroid Build Coastguard Worker
1017*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
1018*c0909341SAndroid Build Coastguard Worker return dc_only_4xN(dst, stride, coeff, 2, 1, 0);
1019*c0909341SAndroid Build Coastguard Worker }
1020*c0909341SAndroid Build Coastguard Worker
1021*c0909341SAndroid Build Coastguard Worker LOAD_SCALE_COEFF_4x8(coeff, v)
1022*c0909341SAndroid Build Coastguard Worker
1023*c0909341SAndroid Build Coastguard Worker dct_4_in(c0, c1, c2, c3, c01, c23)
1024*c0909341SAndroid Build Coastguard Worker dct_4_in(c4, c5, c6, c7, c45, c67)
1025*c0909341SAndroid Build Coastguard Worker
1026*c0909341SAndroid Build Coastguard Worker
1027*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 8);
1028*c0909341SAndroid Build Coastguard Worker
1029*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3);
1030*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c4, c5, c6, c7);
1031*c0909341SAndroid Build Coastguard Worker
1032*c0909341SAndroid Build Coastguard Worker dct_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67)
1033*c0909341SAndroid Build Coastguard Worker
1034*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, cc, d)
1035*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, e, f, g, hh)
1036*c0909341SAndroid Build Coastguard Worker
1037*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(a, b, cc, d, c01, c23)
1038*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(e, f, g, hh, c45, c67)
1039*c0909341SAndroid Build Coastguard Worker
1040*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, cc, d)
1041*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 4 * stride, stride, e, f, g, hh)
1042*c0909341SAndroid Build Coastguard Worker }
1043*c0909341SAndroid Build Coastguard Worker
1044*c0909341SAndroid Build Coastguard Worker
1045*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn4x8(type1, type2) \
1046*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_4x8_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1047*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1048*c0909341SAndroid Build Coastguard Worker { \
1049*c0909341SAndroid Build Coastguard Worker i16x8 v = vec_splats((int16_t)(2896*8)); \
1050*c0909341SAndroid Build Coastguard Worker LOAD_SCALE_COEFF_4x8(coeff, v) \
1051*c0909341SAndroid Build Coastguard Worker type1##_4_in(c0, c1, c2, c3, c01, c23) \
1052*c0909341SAndroid Build Coastguard Worker type1##_4_in(c4, c5, c6, c7, c45, c67) \
1053*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 8); \
1054*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3); \
1055*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c4, c5, c6, c7); \
1056*c0909341SAndroid Build Coastguard Worker type2##_8_out(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
1057*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d) \
1058*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, e, f, g, h) \
1059*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(a, b, c, d, c01, c23) \
1060*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(e, f, g, h, c45, c67) \
1061*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, a, b, c, d) \
1062*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 4 * stride, stride, e, f, g, h) \
1063*c0909341SAndroid Build Coastguard Worker }
1064*c0909341SAndroid Build Coastguard Worker
inv_txfm_fn4x8(adst,dct)1065*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(adst, dct )
1066*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(dct, adst )
1067*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(dct, flipadst)
1068*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(flipadst, dct )
1069*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(adst, flipadst)
1070*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(flipadst, adst )
1071*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(identity, dct )
1072*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(dct, identity)
1073*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(identity, flipadst)
1074*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(flipadst, identity)
1075*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(identity, adst )
1076*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(adst, identity)
1077*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(identity, identity)
1078*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(adst, adst )
1079*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x8(flipadst, flipadst)
1080*c0909341SAndroid Build Coastguard Worker
1081*c0909341SAndroid Build Coastguard Worker
1082*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_8x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
1083*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob)
1084*c0909341SAndroid Build Coastguard Worker {
1085*c0909341SAndroid Build Coastguard Worker i16x8 v = vec_splats((int16_t)(2896*8));
1086*c0909341SAndroid Build Coastguard Worker
1087*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
1088*c0909341SAndroid Build Coastguard Worker return dc_only_8xN(dst, stride, coeff, 1, 1, 0);
1089*c0909341SAndroid Build Coastguard Worker }
1090*c0909341SAndroid Build Coastguard Worker
1091*c0909341SAndroid Build Coastguard Worker LOAD_SCALE_COEFF_8x4(coeff, v)
1092*c0909341SAndroid Build Coastguard Worker
1093*c0909341SAndroid Build Coastguard Worker dct_8_in(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67)
1094*c0909341SAndroid Build Coastguard Worker
1095*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 8 * 4);
1096*c0909341SAndroid Build Coastguard Worker
1097*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3)
1098*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c4, c5, c6, c7)
1099*c0909341SAndroid Build Coastguard Worker
1100*c0909341SAndroid Build Coastguard Worker dct_4_out(c0, c1, c2, c3, c01, c23)
1101*c0909341SAndroid Build Coastguard Worker dct_4_out(c4, c5, c6, c7, c45, c67)
1102*c0909341SAndroid Build Coastguard Worker
1103*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, ae, bf, cg, dh)
1104*c0909341SAndroid Build Coastguard Worker
1105*c0909341SAndroid Build Coastguard Worker i16x8 c04 = (i16x8)vec_mergeh((u64x2)c01, (u64x2)c45);
1106*c0909341SAndroid Build Coastguard Worker i16x8 c15 = (i16x8)vec_mergel((u64x2)c01, (u64x2)c45);
1107*c0909341SAndroid Build Coastguard Worker i16x8 c26 = (i16x8)vec_mergeh((u64x2)c23, (u64x2)c67);
1108*c0909341SAndroid Build Coastguard Worker i16x8 c37 = (i16x8)vec_mergel((u64x2)c23, (u64x2)c67);
1109*c0909341SAndroid Build Coastguard Worker
1110*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(ae, bf, c04, c15)
1111*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(cg, dh, c26, c37)
1112*c0909341SAndroid Build Coastguard Worker
1113*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, ae, bf, cg, dh)
1114*c0909341SAndroid Build Coastguard Worker }
1115*c0909341SAndroid Build Coastguard Worker
1116*c0909341SAndroid Build Coastguard Worker
1117*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn8x4(type1, type2) \
1118*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_8x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1119*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1120*c0909341SAndroid Build Coastguard Worker { \
1121*c0909341SAndroid Build Coastguard Worker i16x8 v = vec_splats((int16_t)(2896*8)); \
1122*c0909341SAndroid Build Coastguard Worker LOAD_SCALE_COEFF_8x4(coeff, v) \
1123*c0909341SAndroid Build Coastguard Worker type1##_8_in(c0, c1, c2, c3, c4, c5, c6, c7, c01, c23, c45, c67) \
1124*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 8 * 4); \
1125*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c0, c1, c2, c3) \
1126*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c4, c5, c6, c7) \
1127*c0909341SAndroid Build Coastguard Worker type2##_4_out(c0, c1, c2, c3, c01, c23) \
1128*c0909341SAndroid Build Coastguard Worker type2##_4_out(c4, c5, c6, c7, c45, c67) \
1129*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, ae, bf, cg, dh) \
1130*c0909341SAndroid Build Coastguard Worker i16x8 c04 = (i16x8)vec_mergeh((u64x2)c01, (u64x2)c45); \
1131*c0909341SAndroid Build Coastguard Worker i16x8 c15 = (i16x8)vec_mergel((u64x2)c01, (u64x2)c45); \
1132*c0909341SAndroid Build Coastguard Worker i16x8 c26 = (i16x8)vec_mergeh((u64x2)c23, (u64x2)c67); \
1133*c0909341SAndroid Build Coastguard Worker i16x8 c37 = (i16x8)vec_mergel((u64x2)c23, (u64x2)c67); \
1134*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(ae, bf, c04, c15) \
1135*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(cg, dh, c26, c37) \
1136*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, ae, bf, cg, dh) \
1137*c0909341SAndroid Build Coastguard Worker }
inv_txfm_fn8x4(adst,dct)1138*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(adst, dct )
1139*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(dct, adst )
1140*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(dct, flipadst)
1141*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(flipadst, dct )
1142*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(adst, flipadst)
1143*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(flipadst, adst )
1144*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(identity, dct )
1145*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(dct, identity)
1146*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(identity, flipadst)
1147*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(flipadst, identity)
1148*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(identity, adst )
1149*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(adst, identity)
1150*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(identity, identity)
1151*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(adst, adst )
1152*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x4(flipadst, flipadst)
1153*c0909341SAndroid Build Coastguard Worker
1154*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_8x8_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
1155*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob)
1156*c0909341SAndroid Build Coastguard Worker {
1157*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
1158*c0909341SAndroid Build Coastguard Worker return dc_only_8xN(dst, stride, coeff, 2, 0, 1);
1159*c0909341SAndroid Build Coastguard Worker }
1160*c0909341SAndroid Build Coastguard Worker
1161*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_8x8(coeff)
1162*c0909341SAndroid Build Coastguard Worker
1163*c0909341SAndroid Build Coastguard Worker dct_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h,
1164*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l,
1165*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7)
1166*c0909341SAndroid Build Coastguard Worker
1167*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 8 * 8);
1168*c0909341SAndroid Build Coastguard Worker
1169*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c0h, c1h, c2h, c3h, vec_splat_s32(1), vec_splat_u32(1))
1170*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c4h, c5h, c6h, c7h, vec_splat_s32(1), vec_splat_u32(1))
1171*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c0l, c1l, c2l, c3l, vec_splat_s32(1), vec_splat_u32(1))
1172*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c4l, c5l, c6l, c7l, vec_splat_s32(1), vec_splat_u32(1))
1173*c0909341SAndroid Build Coastguard Worker
1174*c0909341SAndroid Build Coastguard Worker TRANSPOSE8_I32(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h,
1175*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l)
1176*c0909341SAndroid Build Coastguard Worker
1177*c0909341SAndroid Build Coastguard Worker dct_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h,
1178*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l,
1179*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7)
1180*c0909341SAndroid Build Coastguard Worker
1181*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, cc, d)
1182*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, e, f, g, hh)
1183*c0909341SAndroid Build Coastguard Worker
1184*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(a, b, c0, c1)
1185*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(cc, d, c2, c3)
1186*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(e, f, c4, c5)
1187*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(g, hh, c6, c7)
1188*c0909341SAndroid Build Coastguard Worker
1189*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, a, b, cc, d)
1190*c0909341SAndroid Build Coastguard Worker STORE_8(dst + 4 * stride, stride, e, f, g, hh)
1191*c0909341SAndroid Build Coastguard Worker }
1192*c0909341SAndroid Build Coastguard Worker
1193*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn8x8(type1, type2) \
1194*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_8x8_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1195*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1196*c0909341SAndroid Build Coastguard Worker { \
1197*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_8x8(coeff) \
1198*c0909341SAndroid Build Coastguard Worker type1##_8x2_in(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1199*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
1200*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
1201*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c0h, c1h, c2h, c3h, vec_splat_s32(1), vec_splat_u32(1)) \
1202*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c4h, c5h, c6h, c7h, vec_splat_s32(1), vec_splat_u32(1)) \
1203*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c0l, c1l, c2l, c3l, vec_splat_s32(1), vec_splat_u32(1)) \
1204*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c4l, c5l, c6l, c7l, vec_splat_s32(1), vec_splat_u32(1)) \
1205*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 8 * 8); \
1206*c0909341SAndroid Build Coastguard Worker TRANSPOSE8_I32(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1207*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
1208*c0909341SAndroid Build Coastguard Worker type2##_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1209*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
1210*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
1211*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d) \
1212*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, e, f, g, h) \
1213*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(a, b, c0, c1) \
1214*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(c, d, c2, c3) \
1215*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(e, f, c4, c5) \
1216*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(g, h, c6, c7) \
1217*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, a, b, c, d) \
1218*c0909341SAndroid Build Coastguard Worker STORE_8(dst + 4 * stride, stride, e, f, g, h) \
1219*c0909341SAndroid Build Coastguard Worker }
inv_txfm_fn8x8(adst,dct)1220*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(adst, dct )
1221*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(dct, adst )
1222*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(dct, flipadst)
1223*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(flipadst, dct )
1224*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(adst, flipadst)
1225*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(flipadst, adst )
1226*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(dct, identity)
1227*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(flipadst, identity)
1228*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(adst, identity)
1229*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(adst, adst )
1230*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8(flipadst, flipadst)
1231*c0909341SAndroid Build Coastguard Worker
1232*c0909341SAndroid Build Coastguard Worker // identity + scale is a no op
1233*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn8x8_identity(type2) \
1234*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_identity_##type2##_8x8_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1235*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1236*c0909341SAndroid Build Coastguard Worker { \
1237*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_8x8(coeff) \
1238*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 8 * 8); \
1239*c0909341SAndroid Build Coastguard Worker TRANSPOSE8_I32(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1240*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l) \
1241*c0909341SAndroid Build Coastguard Worker type2##_8x2_out(c0h, c1h, c2h, c3h, c4h, c5h, c6h, c7h, \
1242*c0909341SAndroid Build Coastguard Worker c0l, c1l, c2l, c3l, c4l, c5l, c6l, c7l, \
1243*c0909341SAndroid Build Coastguard Worker c0, c1, c2, c3, c4, c5, c6, c7) \
1244*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, a, b, c, d) \
1245*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, e, f, g, h) \
1246*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(a, b, c0, c1) \
1247*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(c, d, c2, c3) \
1248*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(e, f, c4, c5) \
1249*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_8x4(g, h, c6, c7) \
1250*c0909341SAndroid Build Coastguard Worker STORE_8(dst, stride, a, b, c, d) \
1251*c0909341SAndroid Build Coastguard Worker STORE_8(dst + 4 * stride, stride, e, f, g, h) \
1252*c0909341SAndroid Build Coastguard Worker }
1253*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8_identity(dct )
1254*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8_identity(flipadst)
1255*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8_identity(adst )
1256*c0909341SAndroid Build Coastguard Worker inv_txfm_fn8x8_identity(identity)
1257*c0909341SAndroid Build Coastguard Worker
1258*c0909341SAndroid Build Coastguard Worker #define CLIP16_I32_8(a, b, c, d, e, f, g, h, \
1259*c0909341SAndroid Build Coastguard Worker ab, cd, ef, gh) \
1260*c0909341SAndroid Build Coastguard Worker { \
1261*c0909341SAndroid Build Coastguard Worker ab = vec_packs(a, b); \
1262*c0909341SAndroid Build Coastguard Worker cd = vec_packs(c, d); \
1263*c0909341SAndroid Build Coastguard Worker ef = vec_packs(e, f); \
1264*c0909341SAndroid Build Coastguard Worker gh = vec_packs(g, h); \
1265*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(a, b, ab) \
1266*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c, d, cd) \
1267*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(e, f, ef) \
1268*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(g, h, gh) \
1269*c0909341SAndroid Build Coastguard Worker }
1270*c0909341SAndroid Build Coastguard Worker
1271*c0909341SAndroid Build Coastguard Worker #define MUL_4_INPLACE(a, b, c, d, v) \
1272*c0909341SAndroid Build Coastguard Worker a = vec_mul(a, v); \
1273*c0909341SAndroid Build Coastguard Worker b = vec_mul(b, v); \
1274*c0909341SAndroid Build Coastguard Worker c = vec_mul(c, v); \
1275*c0909341SAndroid Build Coastguard Worker d = vec_mul(d, v); \
1276*c0909341SAndroid Build Coastguard Worker
1277*c0909341SAndroid Build Coastguard Worker #define IDENTITY_16_V(v) \
1278*c0909341SAndroid Build Coastguard Worker { \
1279*c0909341SAndroid Build Coastguard Worker i16x8 v_ = vec_adds(v, v); \
1280*c0909341SAndroid Build Coastguard Worker v = vec_mradds(v, v1697_16, v_); \
1281*c0909341SAndroid Build Coastguard Worker }
1282*c0909341SAndroid Build Coastguard Worker
1283*c0909341SAndroid Build Coastguard Worker #define IDENTITY_16_INNER(c00c01, c02c03, c04c05, c06c07, \
1284*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1285*c0909341SAndroid Build Coastguard Worker { \
1286*c0909341SAndroid Build Coastguard Worker i16x8 v1697_16 = vec_splats((int16_t)(1697*16)); \
1287*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c00c01) \
1288*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c02c03) \
1289*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c04c05) \
1290*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c06c07) \
1291*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c08c09) \
1292*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c10c11) \
1293*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c12c13) \
1294*c0909341SAndroid Build Coastguard Worker IDENTITY_16_V(c14c15) \
1295*c0909341SAndroid Build Coastguard Worker }
1296*c0909341SAndroid Build Coastguard Worker
1297*c0909341SAndroid Build Coastguard Worker #define IDENTITY_16_4_I32(a, b, c, d) \
1298*c0909341SAndroid Build Coastguard Worker { \
1299*c0909341SAndroid Build Coastguard Worker i32x4 a2 = vec_add(a, a); \
1300*c0909341SAndroid Build Coastguard Worker i32x4 b2 = vec_add(b, b); \
1301*c0909341SAndroid Build Coastguard Worker i32x4 c2 = vec_add(c, c); \
1302*c0909341SAndroid Build Coastguard Worker i32x4 d2 = vec_add(d, d); \
1303*c0909341SAndroid Build Coastguard Worker MUL_4_INPLACE(a, b, c, d, v1697) \
1304*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(a, b, c, d, v1024, vec_splat_u32(11)); \
1305*c0909341SAndroid Build Coastguard Worker a = vec_add(a2, a); \
1306*c0909341SAndroid Build Coastguard Worker b = vec_add(b2, b); \
1307*c0909341SAndroid Build Coastguard Worker c = vec_add(c2, c); \
1308*c0909341SAndroid Build Coastguard Worker d = vec_add(d2, d); \
1309*c0909341SAndroid Build Coastguard Worker }
1310*c0909341SAndroid Build Coastguard Worker
1311*c0909341SAndroid Build Coastguard Worker
1312*c0909341SAndroid Build Coastguard Worker #define identity_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1313*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1314*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1315*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1316*c0909341SAndroid Build Coastguard Worker { \
1317*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1697) \
1318*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1024) \
1319*c0909341SAndroid Build Coastguard Worker IDENTITY_16_4_I32(c00, c01, c02, c03) \
1320*c0909341SAndroid Build Coastguard Worker IDENTITY_16_4_I32(c04, c05, c06, c07) \
1321*c0909341SAndroid Build Coastguard Worker IDENTITY_16_4_I32(c08, c09, c10, c11) \
1322*c0909341SAndroid Build Coastguard Worker IDENTITY_16_4_I32(c12, c13, c14, c15) \
1323*c0909341SAndroid Build Coastguard Worker }
1324*c0909341SAndroid Build Coastguard Worker
1325*c0909341SAndroid Build Coastguard Worker #define identity_16_out(c00, c01, c02, c03, c04, c05, c06, c07, \
1326*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1327*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1328*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1329*c0909341SAndroid Build Coastguard Worker { \
1330*c0909341SAndroid Build Coastguard Worker PACK_8(c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15, \
1331*c0909341SAndroid Build Coastguard Worker c00, c02, c04, c06, c08, c10, c12, c14, \
1332*c0909341SAndroid Build Coastguard Worker c01, c03, c05, c07, c09, c11, c13, c15) \
1333*c0909341SAndroid Build Coastguard Worker IDENTITY_16_INNER(c00c01, c02c03, c04c05, c06c07, \
1334*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1335*c0909341SAndroid Build Coastguard Worker }
1336*c0909341SAndroid Build Coastguard Worker
1337*c0909341SAndroid Build Coastguard Worker #define IDCT_16_INNER(c00, c01, c02, c03, c04, c05, c06, c07, \
1338*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1339*c0909341SAndroid Build Coastguard Worker c00c03, c01c02, c07c04, c06c05, \
1340*c0909341SAndroid Build Coastguard Worker c08c11, c09c10, c14c13, c15c12) \
1341*c0909341SAndroid Build Coastguard Worker IDCT_8_INNER(c00, c02, c04, c06, c08, c10, c12, c14, \
1342*c0909341SAndroid Build Coastguard Worker c00c03, c01c02, c07c04, c06c05) \
1343*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(128) \
1344*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(181) \
1345*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(401) \
1346*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(4076) \
1347*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3166) \
1348*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2598) \
1349*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1931) \
1350*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3612) \
1351*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3920) \
1352*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1189) \
1353*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1567) \
1354*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3784) \
1355*c0909341SAndroid Build Coastguard Worker \
1356*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c01, c15, v401, v4076) \
1357*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c09, c07, v3166, v2598) \
1358*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c05, c11, v1931, v3612) \
1359*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c13, c03, v3920, v1189) \
1360*c0909341SAndroid Build Coastguard Worker \
1361*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t15a, t08a, c01, c15, v4076, v401) \
1362*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t14a, t09a, c09, c07, v2598, v3166) \
1363*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t13a, t10a, c05, c11, v3612, v1931) \
1364*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t12a, t11a, c13, c03, v1189, v3920) \
1365*c0909341SAndroid Build Coastguard Worker \
1366*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t15a, t08a, t14a, t09a, v2048, v12) \
1367*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t13a, t10a, t12a, t11a, v2048, v12) \
1368*c0909341SAndroid Build Coastguard Worker \
1369*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t15a, t08a, t14a, t09a, \
1370*c0909341SAndroid Build Coastguard Worker t13a, t10a, t12a, t11a, \
1371*c0909341SAndroid Build Coastguard Worker c08c11, c09c10, c14c13, c15c12) \
1372*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t08, t09, t08a, t09a,,) \
1373*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t11, t10, t11a, t10a,,) \
1374*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t12, t13, t12a, t13a,,) \
1375*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t15, t14, t15a, t14a,,) \
1376*c0909341SAndroid Build Coastguard Worker \
1377*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t08, t09, t11, t10, \
1378*c0909341SAndroid Build Coastguard Worker t12, t13, t15, t14, \
1379*c0909341SAndroid Build Coastguard Worker c08c11, c09c10, c14c13, c15c12) \
1380*c0909341SAndroid Build Coastguard Worker \
1381*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t14, t09, v1567, v3784) \
1382*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t13, t10, v1567, v3784) \
1383*c0909341SAndroid Build Coastguard Worker \
1384*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t14a, t09a, t14, t09, v3784, v1567) \
1385*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t10a, t13a, t13, t10, v3784, v1567) \
1386*c0909341SAndroid Build Coastguard Worker t10a = -t10a; \
1387*c0909341SAndroid Build Coastguard Worker \
1388*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t14a, t09a, t13a, t10a, v2048, v12) \
1389*c0909341SAndroid Build Coastguard Worker \
1390*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t08a, t11a, t08, t11,,) \
1391*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t09, t10, t09a, t10a,,) \
1392*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t15a, t12a, t15, t12,,) \
1393*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t14, t13, t14a, t13a,,) \
1394*c0909341SAndroid Build Coastguard Worker \
1395*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t08a, t11a, t09, t10, \
1396*c0909341SAndroid Build Coastguard Worker t15a, t12a, t14, t13, \
1397*c0909341SAndroid Build Coastguard Worker c08c11, c09c10, c14c13, c15c12) \
1398*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t13a, t10a, t13, t10,,); \
1399*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t12, t11, t12a, t11a,,); \
1400*c0909341SAndroid Build Coastguard Worker \
1401*c0909341SAndroid Build Coastguard Worker MUL_4_INPLACE(t13a, t10a, t12, t11, v181); \
1402*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t13a, t10a, t12, t11, v128, vec_splat_u32(8)) \
1403*c0909341SAndroid Build Coastguard Worker \
1404*c0909341SAndroid Build Coastguard Worker DECLARE_PACK_4(t15at12, t14t13a, t08at11, t09t10a, \
1405*c0909341SAndroid Build Coastguard Worker t15a, t14, t08a, t09, \
1406*c0909341SAndroid Build Coastguard Worker t12, t13a, t11, t10a) \
1407*c0909341SAndroid Build Coastguard Worker \
1408*c0909341SAndroid Build Coastguard Worker c15c12 = vec_subs(c00c03, t15at12); \
1409*c0909341SAndroid Build Coastguard Worker c14c13 = vec_subs(c01c02, t14t13a); \
1410*c0909341SAndroid Build Coastguard Worker c08c11 = vec_subs(c07c04, t08at11); \
1411*c0909341SAndroid Build Coastguard Worker c09c10 = vec_subs(c06c05, t09t10a); \
1412*c0909341SAndroid Build Coastguard Worker c00c03 = vec_adds(c00c03, t15at12); \
1413*c0909341SAndroid Build Coastguard Worker c01c02 = vec_adds(c01c02, t14t13a); \
1414*c0909341SAndroid Build Coastguard Worker c07c04 = vec_adds(c07c04, t08at11); \
1415*c0909341SAndroid Build Coastguard Worker c06c05 = vec_adds(c06c05, t09t10a); \
1416*c0909341SAndroid Build Coastguard Worker
1417*c0909341SAndroid Build Coastguard Worker #define dct_16_out(c00, c01, c02, c03, c04, c05, c06, c07, \
1418*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1419*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1420*c0909341SAndroid Build Coastguard Worker \
1421*c0909341SAndroid Build Coastguard Worker i16x8 c00c03, c01c02, c07c04, c06c05, c08c11, c09c10, c14c13, c15c12; \
1422*c0909341SAndroid Build Coastguard Worker IDCT_16_INNER(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1423*c0909341SAndroid Build Coastguard Worker c00c03, c01c02, c07c04, c06c05, c08c11, c09c10, c14c13, c15c12) \
1424*c0909341SAndroid Build Coastguard Worker c00c01 = (i16x8)vec_mergeh((u64x2)c00c03, (u64x2)c01c02); \
1425*c0909341SAndroid Build Coastguard Worker c02c03 = (i16x8)vec_mergel((u64x2)c01c02, (u64x2)c00c03); \
1426*c0909341SAndroid Build Coastguard Worker c04c05 = (i16x8)vec_mergel((u64x2)c07c04, (u64x2)c06c05); \
1427*c0909341SAndroid Build Coastguard Worker c06c07 = (i16x8)vec_mergeh((u64x2)c06c05, (u64x2)c07c04); \
1428*c0909341SAndroid Build Coastguard Worker c08c09 = (i16x8)vec_mergeh((u64x2)c08c11, (u64x2)c09c10); \
1429*c0909341SAndroid Build Coastguard Worker c10c11 = (i16x8)vec_mergel((u64x2)c09c10, (u64x2)c08c11); \
1430*c0909341SAndroid Build Coastguard Worker c12c13 = (i16x8)vec_mergel((u64x2)c15c12, (u64x2)c14c13); \
1431*c0909341SAndroid Build Coastguard Worker c14c15 = (i16x8)vec_mergeh((u64x2)c14c13, (u64x2)c15c12); \
1432*c0909341SAndroid Build Coastguard Worker
1433*c0909341SAndroid Build Coastguard Worker #define dct_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1434*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1435*c0909341SAndroid Build Coastguard Worker c00c03, c01c02, c07c04, c06c05, c08c11, c09c10, c14c13, c15c12) \
1436*c0909341SAndroid Build Coastguard Worker \
1437*c0909341SAndroid Build Coastguard Worker IDCT_16_INNER(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1438*c0909341SAndroid Build Coastguard Worker c00c03, c01c02, c07c04, c06c05, c08c11, c09c10, c14c13, c15c12) \
1439*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c00, c03, c00c03) \
1440*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c01, c02, c01c02) \
1441*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c07, c04, c07c04) \
1442*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c06, c05, c06c05) \
1443*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c08, c11, c08c11) \
1444*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c09, c10, c09c10) \
1445*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c14, c13, c14c13) \
1446*c0909341SAndroid Build Coastguard Worker UNPACK_PAIR_I16_I32(c15, c12, c15c12) \
1447*c0909341SAndroid Build Coastguard Worker
1448*c0909341SAndroid Build Coastguard Worker
1449*c0909341SAndroid Build Coastguard Worker #define dct_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1450*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1451*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1452*c0909341SAndroid Build Coastguard Worker dct_4_in(cA0, cA1, cA2, cA3, a0b0, c0d0) \
1453*c0909341SAndroid Build Coastguard Worker dct_4_in(cB0, cB1, cB2, cB3, a1b1, c1d1) \
1454*c0909341SAndroid Build Coastguard Worker dct_4_in(cC0, cC1, cC2, cC3, a2b2, c2d2) \
1455*c0909341SAndroid Build Coastguard Worker dct_4_in(cD0, cD1, cD2, cD3, a3b3, c3d3)
1456*c0909341SAndroid Build Coastguard Worker
1457*c0909341SAndroid Build Coastguard Worker
1458*c0909341SAndroid Build Coastguard Worker #define PACK_4x4(c00, c01, c02, c03, \
1459*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1460*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1461*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1462*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1463*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1464*c0909341SAndroid Build Coastguard Worker { \
1465*c0909341SAndroid Build Coastguard Worker c00c01 = vec_packs(c00, c04); c02c03 = vec_packs(c08, c12); \
1466*c0909341SAndroid Build Coastguard Worker c04c05 = vec_packs(c01, c05); c06c07 = vec_packs(c09, c13); \
1467*c0909341SAndroid Build Coastguard Worker c08c09 = vec_packs(c02, c06); c10c11 = vec_packs(c10, c14); \
1468*c0909341SAndroid Build Coastguard Worker c12c13 = vec_packs(c03, c07); c14c15 = vec_packs(c11, c15); \
1469*c0909341SAndroid Build Coastguard Worker }
1470*c0909341SAndroid Build Coastguard Worker
1471*c0909341SAndroid Build Coastguard Worker
1472*c0909341SAndroid Build Coastguard Worker
1473*c0909341SAndroid Build Coastguard Worker #define dct_4x4_out(c00, c01, c02, c03, \
1474*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1475*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1476*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1477*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1478*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1479*c0909341SAndroid Build Coastguard Worker { \
1480*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c00, c01, c02, c03) \
1481*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c04, c05, c06, c07) \
1482*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c08, c09, c10, c11) \
1483*c0909341SAndroid Build Coastguard Worker IDCT_4_INNER(c12, c13, c14, c15) \
1484*c0909341SAndroid Build Coastguard Worker \
1485*c0909341SAndroid Build Coastguard Worker PACK_4x4(c00, c01, c02, c03, \
1486*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1487*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1488*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1489*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1490*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1491*c0909341SAndroid Build Coastguard Worker }
1492*c0909341SAndroid Build Coastguard Worker
1493*c0909341SAndroid Build Coastguard Worker #define IDENTITY_4_I32(a, b, c, d) \
1494*c0909341SAndroid Build Coastguard Worker { \
1495*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(5793) \
1496*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2048) \
1497*c0909341SAndroid Build Coastguard Worker MUL_4_INPLACE(a, b, c, d, v5793) \
1498*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(a, b, c, d, v2048, vec_splat_u32(12)) \
1499*c0909341SAndroid Build Coastguard Worker }
1500*c0909341SAndroid Build Coastguard Worker
1501*c0909341SAndroid Build Coastguard Worker #define identity_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1502*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1503*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1504*c0909341SAndroid Build Coastguard Worker { \
1505*c0909341SAndroid Build Coastguard Worker IDENTITY_4_I32(cA0, cA1, cA2, cA3) \
1506*c0909341SAndroid Build Coastguard Worker IDENTITY_4_I32(cB0, cB1, cB2, cB3) \
1507*c0909341SAndroid Build Coastguard Worker IDENTITY_4_I32(cC0, cC1, cC2, cC3) \
1508*c0909341SAndroid Build Coastguard Worker IDENTITY_4_I32(cD0, cD1, cD2, cD3) \
1509*c0909341SAndroid Build Coastguard Worker }
1510*c0909341SAndroid Build Coastguard Worker
1511*c0909341SAndroid Build Coastguard Worker #define identity_4x4_out(c00, c01, c02, c03, \
1512*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1513*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1514*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1515*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1516*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1517*c0909341SAndroid Build Coastguard Worker { \
1518*c0909341SAndroid Build Coastguard Worker PACK_4x4(c00, c01, c02, c03, \
1519*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1520*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1521*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1522*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1523*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1524*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c00c01, c02c03) \
1525*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c04c05, c06c07) \
1526*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c08c09, c10c11) \
1527*c0909341SAndroid Build Coastguard Worker IDENTITY_4(c12c13, c14c15) \
1528*c0909341SAndroid Build Coastguard Worker }
1529*c0909341SAndroid Build Coastguard Worker
1530*c0909341SAndroid Build Coastguard Worker #define adst_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1531*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1532*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1533*c0909341SAndroid Build Coastguard Worker adst_4_in(cA0, cA1, cA2, cA3, a0b0, c0d0) \
1534*c0909341SAndroid Build Coastguard Worker adst_4_in(cB0, cB1, cB2, cB3, a1b1, c1d1) \
1535*c0909341SAndroid Build Coastguard Worker adst_4_in(cC0, cC1, cC2, cC3, a2b2, c2d2) \
1536*c0909341SAndroid Build Coastguard Worker adst_4_in(cD0, cD1, cD2, cD3, a3b3, c3d3)
1537*c0909341SAndroid Build Coastguard Worker
1538*c0909341SAndroid Build Coastguard Worker #define adst_4x4_out(c00, c01, c02, c03, \
1539*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1540*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1541*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1542*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1543*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1544*c0909341SAndroid Build Coastguard Worker { \
1545*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c00, c01, c02, c03, c00, c01, c02, c03) \
1546*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c04, c05, c06, c07, c04, c05, c06, c07) \
1547*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c08, c09, c10, c11, c08, c09, c10, c11) \
1548*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c12, c13, c14, c15, c12, c13, c14, c15) \
1549*c0909341SAndroid Build Coastguard Worker \
1550*c0909341SAndroid Build Coastguard Worker PACK_4x4(c00, c01, c02, c03, \
1551*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1552*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1553*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1554*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1555*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1556*c0909341SAndroid Build Coastguard Worker }
1557*c0909341SAndroid Build Coastguard Worker
1558*c0909341SAndroid Build Coastguard Worker #define flipadst_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1559*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1560*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1561*c0909341SAndroid Build Coastguard Worker flipadst_4_in(cA0, cA1, cA2, cA3, a0b0, c0d0) \
1562*c0909341SAndroid Build Coastguard Worker flipadst_4_in(cB0, cB1, cB2, cB3, a1b1, c1d1) \
1563*c0909341SAndroid Build Coastguard Worker flipadst_4_in(cC0, cC1, cC2, cC3, a2b2, c2d2) \
1564*c0909341SAndroid Build Coastguard Worker flipadst_4_in(cD0, cD1, cD2, cD3, a3b3, c3d3)
1565*c0909341SAndroid Build Coastguard Worker
1566*c0909341SAndroid Build Coastguard Worker #define flipadst_4x4_out(c00, c01, c02, c03, \
1567*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1568*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1569*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1570*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1571*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1572*c0909341SAndroid Build Coastguard Worker { \
1573*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c00, c01, c02, c03, c03, c02, c01, c00) \
1574*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c04, c05, c06, c07, c07, c06, c05, c04) \
1575*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c08, c09, c10, c11, c11, c10, c09, c08) \
1576*c0909341SAndroid Build Coastguard Worker ADST_INNER_4(c12, c13, c14, c15, c15, c14, c13, c12) \
1577*c0909341SAndroid Build Coastguard Worker \
1578*c0909341SAndroid Build Coastguard Worker PACK_4x4(c00, c01, c02, c03, \
1579*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1580*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1581*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1582*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1583*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1584*c0909341SAndroid Build Coastguard Worker }
1585*c0909341SAndroid Build Coastguard Worker
1586*c0909341SAndroid Build Coastguard Worker #define ADST_INNER_16(c00, c01, c02, c03, c04, c05, c06, c07, \
1587*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1588*c0909341SAndroid Build Coastguard Worker o00, o01, o02, o03, o04, o05, o06, o07, \
1589*c0909341SAndroid Build Coastguard Worker o08, o09, o10, o11, o12, o13, o14, o15, \
1590*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1591*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2048); \
1592*c0909341SAndroid Build Coastguard Worker u32x4 v12 = vec_splat_u32(12); \
1593*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(4091) \
1594*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(201) \
1595*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3973) \
1596*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(995) \
1597*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3703) \
1598*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1751) \
1599*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3290) \
1600*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2440) \
1601*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2751) \
1602*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3035) \
1603*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2106) \
1604*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3513) \
1605*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1380) \
1606*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3857) \
1607*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(601) \
1608*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(4052) \
1609*c0909341SAndroid Build Coastguard Worker \
1610*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c15, c00, v4091, v201) \
1611*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c13, c02, v3973, v995) \
1612*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c11, c04, v3703, v1751) \
1613*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c09, c06, v3290, v2440) \
1614*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c07, c08, v2751, v3035) \
1615*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c05, c10, v2106, v3513) \
1616*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c03, c12, v1380, v3857) \
1617*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(c01, c14, v601, v4052) \
1618*c0909341SAndroid Build Coastguard Worker \
1619*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t00, t01, c15, c00, v4091, v201);\
1620*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t02, t03, c13, c02, v3973, v995) \
1621*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t04, t05, c11, c04, v3703, v1751) \
1622*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t06, t07, c09, c06, v3290, v2440) \
1623*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t08, t09, c07, c08, v2751, v3035) \
1624*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t10, t11, c05, c10, v2106, v3513) \
1625*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t12, t13, c03, c12, v1380, v3857) \
1626*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t14, t15, c01, c14, v601, v4052) \
1627*c0909341SAndroid Build Coastguard Worker \
1628*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t00, t01, t02, t03, v2048, v12) \
1629*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t04, t05, t06, t07, v2048, v12) \
1630*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t08, t09, t10, t11, v2048, v12) \
1631*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t12, t13, t14, t15, v2048, v12) \
1632*c0909341SAndroid Build Coastguard Worker \
1633*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t00a, t08a, t00, t08,,) \
1634*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t01a, t09a, t01, t09,,) \
1635*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t02a, t10a, t02, t10,,) \
1636*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t03a, t11a, t03, t11,,) \
1637*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t04a, t12a, t04, t12,,) \
1638*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t05a, t13a, t05, t13,,) \
1639*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t06a, t14a, t06, t14,,) \
1640*c0909341SAndroid Build Coastguard Worker DECLARE_ADD_SUB_PAIR(t07a, t15a, t07, t15,,) \
1641*c0909341SAndroid Build Coastguard Worker \
1642*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t00a, t08a, t01a, t09a, t02a, t10a, t03a, t11a, \
1643*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07); \
1644*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t04a, t12a, t05a, t13a, t06a, t14a, t07a, t15a, \
1645*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07); \
1646*c0909341SAndroid Build Coastguard Worker \
1647*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(4017) \
1648*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(799) \
1649*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(2276) \
1650*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3406) \
1651*c0909341SAndroid Build Coastguard Worker \
1652*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t08a, t09a, v4017, v799); \
1653*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t10a, t11a, v2276, v3406); \
1654*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t13a, t12a, v799, v4017); \
1655*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t15a, t14a, v3406, v2276); \
1656*c0909341SAndroid Build Coastguard Worker \
1657*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t08, t09, t08a, t09a, v4017, v799); \
1658*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t10, t11, t10a, t11a, v2276, v3406); \
1659*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t13, t12, t13a, t12a, v799, v4017); \
1660*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t15, t14, t15a, t14a, v3406, v2276); \
1661*c0909341SAndroid Build Coastguard Worker \
1662*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t08, t09, t10, t11, v2048, v12) \
1663*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t13, t12, t15, t14, v2048, v12) \
1664*c0909341SAndroid Build Coastguard Worker \
1665*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t00, t04, t00a, t04a,,); \
1666*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t01, t05, t01a, t05a,,); \
1667*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t02, t06, t02a, t06a,,); \
1668*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t03, t07, t03a, t07a,,); \
1669*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t08a, t12a, t08, t12,,); \
1670*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t09a, t13a, t09, t13,,); \
1671*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t10a, t14a, t10, t14,,); \
1672*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t11a, t15a, t11, t15,,); \
1673*c0909341SAndroid Build Coastguard Worker \
1674*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t00, t04, t01, t05, t02, t06, t03, t07, \
1675*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1676*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(t08a, t12a, t09a, t13a, t10a, t14a, t11a, t15a, \
1677*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1678*c0909341SAndroid Build Coastguard Worker \
1679*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(3784) \
1680*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(1567) \
1681*c0909341SAndroid Build Coastguard Worker \
1682*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t04, t05, v3784, v1567) \
1683*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t07, t06, v1567, v3784) \
1684*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t12a, t13a, v3784, v1567) \
1685*c0909341SAndroid Build Coastguard Worker DECLARE_MUL_PAIR_I32(t15a, t14a, v1567, v3784) \
1686*c0909341SAndroid Build Coastguard Worker \
1687*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t04a, t05a, t04, t05, v3784, v1567) \
1688*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t07a, t06a, t07, t06, v1567, v3784) \
1689*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t12, t13, t12a, t13a, v3784, v1567) \
1690*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(t15, t14, t15a, t14a, v1567, v3784) \
1691*c0909341SAndroid Build Coastguard Worker \
1692*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t04a, t05a, t07a, t06a, v2048, v12) \
1693*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(t12, t13, t15, t14, v2048, v12) \
1694*c0909341SAndroid Build Coastguard Worker \
1695*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o00, t02a, t00, t02,,) \
1696*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o15, t03a, t01, t03,,) \
1697*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o03, t06, t04a, t06a,,) \
1698*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o12, t07, t05a, t07a,,) \
1699*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o01, t10, t08a, t10a,,) \
1700*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o14, t11, t09a, t11a,,) \
1701*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o02, t14a, t12, t14,,) \
1702*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o13, t15a, t13, t15,,) \
1703*c0909341SAndroid Build Coastguard Worker \
1704*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(o00, t02a, o15, t03a, o03, t06, o12, t07, \
1705*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1706*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(o01, t10, o14, t11, o02, t14a, o13, t15a, \
1707*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1708*c0909341SAndroid Build Coastguard Worker \
1709*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(181) \
1710*c0909341SAndroid Build Coastguard Worker DECLARE_SPLAT_I32(128) \
1711*c0909341SAndroid Build Coastguard Worker u32x4 v8 = vec_splat_u32(8); \
1712*c0909341SAndroid Build Coastguard Worker \
1713*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o07, o08, t02a, t03a,,) \
1714*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o04, o11, t06, t07,,) \
1715*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o06, o09, t10, t11,,) \
1716*c0909341SAndroid Build Coastguard Worker ADD_SUB_PAIR(o05, o10, t14a, t15a,,) \
1717*c0909341SAndroid Build Coastguard Worker \
1718*c0909341SAndroid Build Coastguard Worker MUL_4_INPLACE(o07, o08, o04, o11, v181) \
1719*c0909341SAndroid Build Coastguard Worker MUL_4_INPLACE(o06, o09, o05, o10, v181) \
1720*c0909341SAndroid Build Coastguard Worker \
1721*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(o07, o08, o04, o11, v128, v8) \
1722*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(o06, o09, o05, o10, v128, v8) \
1723*c0909341SAndroid Build Coastguard Worker \
1724*c0909341SAndroid Build Coastguard Worker o01 = -o01; \
1725*c0909341SAndroid Build Coastguard Worker o03 = -o03; \
1726*c0909341SAndroid Build Coastguard Worker o05 = -o05; \
1727*c0909341SAndroid Build Coastguard Worker o07 = -o07; \
1728*c0909341SAndroid Build Coastguard Worker o09 = -o09; \
1729*c0909341SAndroid Build Coastguard Worker o11 = -o11; \
1730*c0909341SAndroid Build Coastguard Worker o13 = -o13; \
1731*c0909341SAndroid Build Coastguard Worker o15 = -o15; \
1732*c0909341SAndroid Build Coastguard Worker
1733*c0909341SAndroid Build Coastguard Worker #define adst_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1734*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1735*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1736*c0909341SAndroid Build Coastguard Worker { \
1737*c0909341SAndroid Build Coastguard Worker ADST_INNER_16(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1738*c0909341SAndroid Build Coastguard Worker c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1739*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1740*c0909341SAndroid Build Coastguard Worker }
1741*c0909341SAndroid Build Coastguard Worker
1742*c0909341SAndroid Build Coastguard Worker #define adst_16_out(c00, c01, c02, c03, c04, c05, c06, c07, \
1743*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1744*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1745*c0909341SAndroid Build Coastguard Worker { \
1746*c0909341SAndroid Build Coastguard Worker ADST_INNER_16(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1747*c0909341SAndroid Build Coastguard Worker c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1748*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1749*c0909341SAndroid Build Coastguard Worker PACK_8(c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15, \
1750*c0909341SAndroid Build Coastguard Worker c00, c02, c04, c06, c08, c10, c12, c14, \
1751*c0909341SAndroid Build Coastguard Worker c01, c03, c05, c07, c09, c11, c13, c15) \
1752*c0909341SAndroid Build Coastguard Worker }
1753*c0909341SAndroid Build Coastguard Worker
1754*c0909341SAndroid Build Coastguard Worker #define flipadst_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1755*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1756*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1757*c0909341SAndroid Build Coastguard Worker { \
1758*c0909341SAndroid Build Coastguard Worker ADST_INNER_16(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1759*c0909341SAndroid Build Coastguard Worker c15, c14, c13, c12, c11, c10, c09, c08, c07, c06, c05, c04, c03, c02, c01, c00, \
1760*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1761*c0909341SAndroid Build Coastguard Worker }
1762*c0909341SAndroid Build Coastguard Worker
1763*c0909341SAndroid Build Coastguard Worker #define flipadst_16_out(c00, c01, c02, c03, c04, c05, c06, c07, \
1764*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1765*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1766*c0909341SAndroid Build Coastguard Worker { \
1767*c0909341SAndroid Build Coastguard Worker ADST_INNER_16(c00, c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, \
1768*c0909341SAndroid Build Coastguard Worker c15, c14, c13, c12, c11, c10, c09, c08, c07, c06, c05, c04, c03, c02, c01, c00, \
1769*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07) \
1770*c0909341SAndroid Build Coastguard Worker PACK_8(c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15, \
1771*c0909341SAndroid Build Coastguard Worker c00, c02, c04, c06, c08, c10, c12, c14, \
1772*c0909341SAndroid Build Coastguard Worker c01, c03, c05, c07, c09, c11, c13, c15) \
1773*c0909341SAndroid Build Coastguard Worker }
1774*c0909341SAndroid Build Coastguard Worker
1775*c0909341SAndroid Build Coastguard Worker
1776*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_4x16_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
1777*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob
1778*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
1779*c0909341SAndroid Build Coastguard Worker {
1780*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
1781*c0909341SAndroid Build Coastguard Worker return dc_only_4xN(dst, stride, coeff, 4, 0, 1);
1782*c0909341SAndroid Build Coastguard Worker }
1783*c0909341SAndroid Build Coastguard Worker
1784*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_4x16(coeff)
1785*c0909341SAndroid Build Coastguard Worker
1786*c0909341SAndroid Build Coastguard Worker dct_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3,
1787*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3,
1788*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3)
1789*c0909341SAndroid Build Coastguard Worker
1790*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 16);
1791*c0909341SAndroid Build Coastguard Worker
1792*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA0, cB0, cC0, cD0, vec_splat_s32(1), vec_splat_u32(1))
1793*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA1, cB1, cC1, cD1, vec_splat_s32(1), vec_splat_u32(1))
1794*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA2, cB2, cC2, cD2, vec_splat_s32(1), vec_splat_u32(1))
1795*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA3, cB3, cC3, cD3, vec_splat_s32(1), vec_splat_u32(1))
1796*c0909341SAndroid Build Coastguard Worker TRANSPOSE4x16_I32(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3,
1797*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3)
1798*c0909341SAndroid Build Coastguard Worker
1799*c0909341SAndroid Build Coastguard Worker dct_16_out(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3,
1800*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3,
1801*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3)
1802*c0909341SAndroid Build Coastguard Worker
1803*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, l00, l01, l02, l03)
1804*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, l04, l05, l06, l07)
1805*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 8 * stride, stride, l08, l09, l10, l11)
1806*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 12 * stride, stride, l12, l13, l14, l15)
1807*c0909341SAndroid Build Coastguard Worker
1808*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l00, l01, l02, l03, a0b0, c0d0);
1809*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l04, l05, l06, l07, a1b1, c1d1);
1810*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l08, l09, l10, l11, a2b2, c2d2);
1811*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l12, l13, l14, l15, a3b3, c3d3);
1812*c0909341SAndroid Build Coastguard Worker
1813*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, l00, l01, l02, l03);
1814*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 4 * stride, stride, l04, l05, l06, l07);
1815*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 8 * stride, stride, l08, l09, l10, l11);
1816*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 12 * stride, stride, l12, l13, l14, l15);
1817*c0909341SAndroid Build Coastguard Worker }
1818*c0909341SAndroid Build Coastguard Worker
1819*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn4x16(type1, type2) \
1820*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_4x16_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1821*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1822*c0909341SAndroid Build Coastguard Worker { \
1823*c0909341SAndroid Build Coastguard Worker LOAD_COEFF_4x16(coeff) \
1824*c0909341SAndroid Build Coastguard Worker type1##_4x4_in(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1825*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1826*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1827*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 4 * 16); \
1828*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA0, cB0, cC0, cD0, vec_splat_s32(1), vec_splat_u32(1)) \
1829*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA1, cB1, cC1, cD1, vec_splat_s32(1), vec_splat_u32(1)) \
1830*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA2, cB2, cC2, cD2, vec_splat_s32(1), vec_splat_u32(1)) \
1831*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(cA3, cB3, cC3, cD3, vec_splat_s32(1), vec_splat_u32(1)) \
1832*c0909341SAndroid Build Coastguard Worker TRANSPOSE4x16_I32(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1833*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3) \
1834*c0909341SAndroid Build Coastguard Worker type2##_16_out(cA0, cA1, cA2, cA3, cB0, cB1, cB2, cB3, \
1835*c0909341SAndroid Build Coastguard Worker cC0, cC1, cC2, cC3, cD0, cD1, cD2, cD3, \
1836*c0909341SAndroid Build Coastguard Worker a0b0, c0d0, a1b1, c1d1, a2b2, c2d2, a3b3, c3d3) \
1837*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, l00, l01, l02, l03) \
1838*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 4 * stride, stride, l04, l05, l06, l07) \
1839*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 8 * stride, stride, l08, l09, l10, l11) \
1840*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst + 12 * stride, stride, l12, l13, l14, l15) \
1841*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l00, l01, l02, l03, a0b0, c0d0); \
1842*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l04, l05, l06, l07, a1b1, c1d1); \
1843*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l08, l09, l10, l11, a2b2, c2d2); \
1844*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_4(l12, l13, l14, l15, a3b3, c3d3); \
1845*c0909341SAndroid Build Coastguard Worker STORE_4(dst, stride, l00, l01, l02, l03); \
1846*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 4 * stride, stride, l04, l05, l06, l07); \
1847*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 8 * stride, stride, l08, l09, l10, l11); \
1848*c0909341SAndroid Build Coastguard Worker STORE_4(dst + 12 * stride, stride, l12, l13, l14, l15); \
1849*c0909341SAndroid Build Coastguard Worker }
inv_txfm_fn4x16(adst,dct)1850*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(adst, dct )
1851*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(dct, adst )
1852*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(dct, flipadst)
1853*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(flipadst, dct )
1854*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(adst, flipadst)
1855*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(flipadst, adst )
1856*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(identity, dct )
1857*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(dct, identity)
1858*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(identity, flipadst)
1859*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(flipadst, identity)
1860*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(identity, adst )
1861*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(adst, identity)
1862*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(identity, identity)
1863*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(adst, adst )
1864*c0909341SAndroid Build Coastguard Worker inv_txfm_fn4x16(flipadst, flipadst)
1865*c0909341SAndroid Build Coastguard Worker
1866*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_dct_dct_16x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride,
1867*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob)
1868*c0909341SAndroid Build Coastguard Worker {
1869*c0909341SAndroid Build Coastguard Worker
1870*c0909341SAndroid Build Coastguard Worker if (eob < 1) {
1871*c0909341SAndroid Build Coastguard Worker return dc_only_16xN(dst, stride, coeff, 1, 0, 1);
1872*c0909341SAndroid Build Coastguard Worker }
1873*c0909341SAndroid Build Coastguard Worker
1874*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c00c01, c02c03) \
1875*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c04c05, c06c07) \
1876*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+32, c08c09, c10c11) \
1877*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+48, c12c13, c14c15) \
1878*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c00c01, c02c03, c00, c01, c02, c03)
1879*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c04c05, c06c07, c04, c05, c06, c07)
1880*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c08c09, c10c11, c08, c09, c10, c11)
1881*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c12c13, c14c15, c12, c13, c14, c15)
1882*c0909341SAndroid Build Coastguard Worker
1883*c0909341SAndroid Build Coastguard Worker dct_16_in(c00, c01, c02, c03, c04, c05, c06, c07,
1884*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15,
1885*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15)
1886*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 16 * 4);
1887*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c00, c01, c02, c03, vec_splat_s32(1), vec_splat_u32(1))
1888*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c04, c05, c06, c07, vec_splat_s32(1), vec_splat_u32(1))
1889*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c08, c09, c10, c11, vec_splat_s32(1), vec_splat_u32(1))
1890*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c12, c13, c14, c15, vec_splat_s32(1), vec_splat_u32(1))
1891*c0909341SAndroid Build Coastguard Worker
1892*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c00, c01, c02, c03);
1893*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c04, c05, c06, c07);
1894*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c08, c09, c10, c11);
1895*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c12, c13, c14, c15);
1896*c0909341SAndroid Build Coastguard Worker
1897*c0909341SAndroid Build Coastguard Worker dct_4x4_out(c00, c01, c02, c03,
1898*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07,
1899*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11,
1900*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15,
1901*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07,
1902*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15)
1903*c0909341SAndroid Build Coastguard Worker
1904*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, l0, l1, l2, l3)
1905*c0909341SAndroid Build Coastguard Worker
1906*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_16x4(l0, l1, l2, l3,
1907*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07,
1908*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15)
1909*c0909341SAndroid Build Coastguard Worker
1910*c0909341SAndroid Build Coastguard Worker STORE_16(dst, stride, l0, l1, l2, l3)
1911*c0909341SAndroid Build Coastguard Worker }
1912*c0909341SAndroid Build Coastguard Worker
1913*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn16x4(type1, type2) \
1914*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_##type1##_##type2##_16x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1915*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1916*c0909341SAndroid Build Coastguard Worker { \
1917*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c00c01, c02c03) \
1918*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c04c05, c06c07) \
1919*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+32, c08c09, c10c11) \
1920*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+48, c12c13, c14c15) \
1921*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c00c01, c02c03, c00, c01, c02, c03) \
1922*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c04c05, c06c07, c04, c05, c06, c07) \
1923*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c08c09, c10c11, c08, c09, c10, c11) \
1924*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c12c13, c14c15, c12, c13, c14, c15) \
1925*c0909341SAndroid Build Coastguard Worker type1##_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1926*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1927*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1928*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 16 * 4); \
1929*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c00, c01, c02, c03, vec_splat_s32(1), vec_splat_u32(1)) \
1930*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c04, c05, c06, c07, vec_splat_s32(1), vec_splat_u32(1)) \
1931*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c08, c09, c10, c11, vec_splat_s32(1), vec_splat_u32(1)) \
1932*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c12, c13, c14, c15, vec_splat_s32(1), vec_splat_u32(1)) \
1933*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c00, c01, c02, c03); \
1934*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c04, c05, c06, c07); \
1935*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c08, c09, c10, c11); \
1936*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c12, c13, c14, c15); \
1937*c0909341SAndroid Build Coastguard Worker type2##_4x4_out(c00, c01, c02, c03, \
1938*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1939*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1940*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1941*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1942*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15); \
1943*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, l0, l1, l2, l3) \
1944*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_16x4(l0, l1, l2, l3, \
1945*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1946*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1947*c0909341SAndroid Build Coastguard Worker STORE_16(dst, stride, l0, l1, l2, l3) \
1948*c0909341SAndroid Build Coastguard Worker }
1949*c0909341SAndroid Build Coastguard Worker
1950*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(adst, dct )
1951*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(dct, adst )
1952*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(dct, flipadst)
1953*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(flipadst, dct )
1954*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(adst, flipadst)
1955*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(flipadst, adst )
1956*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(dct, identity)
1957*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(flipadst, identity)
1958*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(adst, identity)
1959*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(identity, identity)
1960*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(adst, adst )
1961*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4(flipadst, flipadst)
1962*c0909341SAndroid Build Coastguard Worker
1963*c0909341SAndroid Build Coastguard Worker #define inv_txfm_fn16x4_identity(type2) \
1964*c0909341SAndroid Build Coastguard Worker void dav1d_inv_txfm_add_identity_##type2##_16x4_8bpc_pwr9(uint8_t *dst, const ptrdiff_t stride, \
1965*c0909341SAndroid Build Coastguard Worker int16_t *const coeff, const int eob) \
1966*c0909341SAndroid Build Coastguard Worker { \
1967*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff, c00c01, c02c03) \
1968*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+16, c04c05, c06c07) \
1969*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+32, c08c09, c10c11) \
1970*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_2_I16(coeff+48, c12c13, c14c15) \
1971*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c00c01, c02c03, c00, c01, c02, c03) \
1972*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c04c05, c06c07, c04, c05, c06, c07) \
1973*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c08c09, c10c11, c08, c09, c10, c11) \
1974*c0909341SAndroid Build Coastguard Worker UNPACK_DECLARE_4_I16_I32(c12c13, c14c15, c12, c13, c14, c15) \
1975*c0909341SAndroid Build Coastguard Worker identity_16_in(c00, c01, c02, c03, c04, c05, c06, c07, \
1976*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, c12, c13, c14, c15, \
1977*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, c08c09, c10c11, c12c13, c14c15) \
1978*c0909341SAndroid Build Coastguard Worker memset(coeff, 0, sizeof(*coeff) * 16 * 4); \
1979*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c00, c01, c02, c03, vec_splat_s32(1), vec_splat_u32(1)) \
1980*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c04, c05, c06, c07, vec_splat_s32(1), vec_splat_u32(1)) \
1981*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c08, c09, c10, c11, vec_splat_s32(1), vec_splat_u32(1)) \
1982*c0909341SAndroid Build Coastguard Worker SCALE_ROUND_4(c12, c13, c14, c15, vec_splat_s32(1), vec_splat_u32(1)) \
1983*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(c00, c01, c02, c03, c04, c05, c06, c07, c00c01, c02c03, c04c05, c06c07) \
1984*c0909341SAndroid Build Coastguard Worker CLIP16_I32_8(c08, c09, c10, c11, c12, c13, c14, c15, c08c09, c10c11, c12c13, c14c15) \
1985*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c00, c01, c02, c03); \
1986*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c04, c05, c06, c07); \
1987*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c08, c09, c10, c11); \
1988*c0909341SAndroid Build Coastguard Worker TRANSPOSE4_I32(c12, c13, c14, c15); \
1989*c0909341SAndroid Build Coastguard Worker type2##_4x4_out(c00, c01, c02, c03, \
1990*c0909341SAndroid Build Coastguard Worker c04, c05, c06, c07, \
1991*c0909341SAndroid Build Coastguard Worker c08, c09, c10, c11, \
1992*c0909341SAndroid Build Coastguard Worker c12, c13, c14, c15, \
1993*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1994*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15); \
1995*c0909341SAndroid Build Coastguard Worker LOAD_DECLARE_4(dst, stride, l0, l1, l2, l3) \
1996*c0909341SAndroid Build Coastguard Worker APPLY_COEFF_16x4(l0, l1, l2, l3, \
1997*c0909341SAndroid Build Coastguard Worker c00c01, c02c03, c04c05, c06c07, \
1998*c0909341SAndroid Build Coastguard Worker c08c09, c10c11, c12c13, c14c15) \
1999*c0909341SAndroid Build Coastguard Worker STORE_16(dst, stride, l0, l1, l2, l3) \
2000*c0909341SAndroid Build Coastguard Worker }
2001*c0909341SAndroid Build Coastguard Worker
2002*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4_identity(dct)
2003*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4_identity(adst)
2004*c0909341SAndroid Build Coastguard Worker inv_txfm_fn16x4_identity(flipadst)
2005*c0909341SAndroid Build Coastguard Worker
2006*c0909341SAndroid Build Coastguard Worker #endif // BITDEPTH
2007