xref: /aosp_15_r20/external/libdav1d/src/x86/itx_sse.asm (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker; Copyright © 2018-2021, VideoLAN and dav1d authors
2*c0909341SAndroid Build Coastguard Worker; Copyright © 2018, Two Orioles, LLC
3*c0909341SAndroid Build Coastguard Worker; All rights reserved.
4*c0909341SAndroid Build Coastguard Worker;
5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without
6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met:
7*c0909341SAndroid Build Coastguard Worker;
8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this
9*c0909341SAndroid Build Coastguard Worker;    list of conditions and the following disclaimer.
10*c0909341SAndroid Build Coastguard Worker;
11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice,
12*c0909341SAndroid Build Coastguard Worker;    this list of conditions and the following disclaimer in the documentation
13*c0909341SAndroid Build Coastguard Worker;    and/or other materials provided with the distribution.
14*c0909341SAndroid Build Coastguard Worker;
15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*c0909341SAndroid Build Coastguard Worker
26*c0909341SAndroid Build Coastguard Worker%include "config.asm"
27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm"
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 16
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Workerdeint_shuf:  db  0,  1,  4,  5,  8,  9, 12, 13,  2,  3,  6,  7, 10, 11, 14, 15
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Workerdeint_shuf1: db  0,  1,  8,  9,  2,  3, 10, 11,  4,  5, 12, 13,  6,  7, 14, 15
35*c0909341SAndroid Build Coastguard Workerdeint_shuf2: db  8,  9,  0,  1, 10, 11,  2,  3, 12, 13,  4,  5, 14, 15,  6,  7
36*c0909341SAndroid Build Coastguard Worker
37*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2-3 0 ; !0 = m%1_m%2, 2 = no %2_%1
38*c0909341SAndroid Build Coastguard Workerpw_%1_m%2:  times 4 dw  %1, -%2
39*c0909341SAndroid Build Coastguard Worker%if %3 != 2
40*c0909341SAndroid Build Coastguard Workerpw_%2_%1:   times 4 dw  %2,  %1
41*c0909341SAndroid Build Coastguard Worker%endif
42*c0909341SAndroid Build Coastguard Worker%if %3
43*c0909341SAndroid Build Coastguard Workerpw_m%1_m%2: times 4 dw -%1, -%2
44*c0909341SAndroid Build Coastguard Worker%endif
45*c0909341SAndroid Build Coastguard Worker%endmacro
46*c0909341SAndroid Build Coastguard Worker
47*c0909341SAndroid Build Coastguard Worker;adst4
48*c0909341SAndroid Build Coastguard Workerpw_1321_3803:   times 4 dw  1321,  3803
49*c0909341SAndroid Build Coastguard Workerpw_2482_m1321:  times 4 dw  2482, -1321
50*c0909341SAndroid Build Coastguard Workerpw_3344_2482:   times 4 dw  3344,  2482
51*c0909341SAndroid Build Coastguard Workerpw_3344_m3803:  times 4 dw  3344, -3803
52*c0909341SAndroid Build Coastguard Workerpw_3344_m3344:  times 4 dw  3344, -3344
53*c0909341SAndroid Build Coastguard Workerpw_0_3344       times 4 dw     0,  3344
54*c0909341SAndroid Build Coastguard Workerpw_m6688_m3803: times 4 dw -6688, -3803
55*c0909341SAndroid Build Coastguard Worker
56*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 2896
57*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1567, 3784
58*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  799, 4017
59*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3406, 2276
60*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  401, 4076
61*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1931, 3612
62*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 2598
63*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3920, 1189
64*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3784, 1567, 1
65*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  995, 3973
66*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 3703
67*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3513, 2106
68*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 1380
69*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017,  799, 1
70*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  201, 4091
71*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2440, 3290
72*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 2751
73*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4052,  601
74*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2276, 3406, 1
75*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076,  401, 2
76*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 3166, 2
77*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3612, 1931, 2
78*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1189, 3920, 2
79*c0909341SAndroid Build Coastguard Worker
80*c0909341SAndroid Build Coastguard Workerpd_2048:        times 4 dd  2048
81*c0909341SAndroid Build Coastguard Workerpw_2048:        times 8 dw  2048
82*c0909341SAndroid Build Coastguard Workerpw_m2048:       times 8 dw -2048
83*c0909341SAndroid Build Coastguard Workerpw_4096:        times 8 dw  4096
84*c0909341SAndroid Build Coastguard Workerpw_16384:       times 8 dw  16384
85*c0909341SAndroid Build Coastguard Workerpw_m16384:      times 8 dw  -16384
86*c0909341SAndroid Build Coastguard Workerpw_1697x16:     times 8 dw  1697*16
87*c0909341SAndroid Build Coastguard Workerpw_1697x8:      times 8 dw  1697*8
88*c0909341SAndroid Build Coastguard Workerpw_2896x8:      times 8 dw  2896*8
89*c0909341SAndroid Build Coastguard Workerpw_3344x8:      times 8 dw  3344*8
90*c0909341SAndroid Build Coastguard Workerpw_8192:        times 8 dw  8192
91*c0909341SAndroid Build Coastguard Workerpw_m8192:       times 8 dw -8192
92*c0909341SAndroid Build Coastguard Workerpw_5:           times 8 dw  5
93*c0909341SAndroid Build Coastguard Workerpw_201x8:       times 8 dw   201*8
94*c0909341SAndroid Build Coastguard Workerpw_4091x8:      times 8 dw  4091*8
95*c0909341SAndroid Build Coastguard Workerpw_m2751x8:     times 8 dw -2751*8
96*c0909341SAndroid Build Coastguard Workerpw_3035x8:      times 8 dw  3035*8
97*c0909341SAndroid Build Coastguard Workerpw_1751x8:      times 8 dw  1751*8
98*c0909341SAndroid Build Coastguard Workerpw_3703x8:      times 8 dw  3703*8
99*c0909341SAndroid Build Coastguard Workerpw_m1380x8:     times 8 dw -1380*8
100*c0909341SAndroid Build Coastguard Workerpw_3857x8:      times 8 dw  3857*8
101*c0909341SAndroid Build Coastguard Workerpw_995x8:       times 8 dw   995*8
102*c0909341SAndroid Build Coastguard Workerpw_3973x8:      times 8 dw  3973*8
103*c0909341SAndroid Build Coastguard Workerpw_m2106x8:     times 8 dw -2106*8
104*c0909341SAndroid Build Coastguard Workerpw_3513x8:      times 8 dw  3513*8
105*c0909341SAndroid Build Coastguard Workerpw_2440x8:      times 8 dw  2440*8
106*c0909341SAndroid Build Coastguard Workerpw_3290x8:      times 8 dw  3290*8
107*c0909341SAndroid Build Coastguard Workerpw_m601x8:      times 8 dw  -601*8
108*c0909341SAndroid Build Coastguard Workerpw_4052x8:      times 8 dw  4052*8
109*c0909341SAndroid Build Coastguard Worker
110*c0909341SAndroid Build Coastguard Workerpw_4095x8:      times 8 dw  4095*8
111*c0909341SAndroid Build Coastguard Workerpw_101x8:       times 8 dw   101*8
112*c0909341SAndroid Build Coastguard Workerpw_2967x8:      times 8 dw  2967*8
113*c0909341SAndroid Build Coastguard Workerpw_m2824x8:     times 8 dw -2824*8
114*c0909341SAndroid Build Coastguard Workerpw_3745x8:      times 8 dw  3745*8
115*c0909341SAndroid Build Coastguard Workerpw_1660x8:      times 8 dw  1660*8
116*c0909341SAndroid Build Coastguard Workerpw_3822x8:      times 8 dw  3822*8
117*c0909341SAndroid Build Coastguard Workerpw_m1474x8:     times 8 dw -1474*8
118*c0909341SAndroid Build Coastguard Workerpw_3996x8:      times 8 dw  3996*8
119*c0909341SAndroid Build Coastguard Workerpw_897x8:       times 8 dw   897*8
120*c0909341SAndroid Build Coastguard Workerpw_3461x8:      times 8 dw  3461*8
121*c0909341SAndroid Build Coastguard Workerpw_m2191x8:     times 8 dw -2191*8
122*c0909341SAndroid Build Coastguard Workerpw_3349x8:      times 8 dw  3349*8
123*c0909341SAndroid Build Coastguard Workerpw_2359x8:      times 8 dw  2359*8
124*c0909341SAndroid Build Coastguard Workerpw_4036x8:      times 8 dw  4036*8
125*c0909341SAndroid Build Coastguard Workerpw_m700x8:      times 8 dw  -700*8
126*c0909341SAndroid Build Coastguard Workerpw_4065x8:      times 8 dw  4065*8
127*c0909341SAndroid Build Coastguard Workerpw_501x8:       times 8 dw   501*8
128*c0909341SAndroid Build Coastguard Workerpw_3229x8:      times 8 dw  3229*8
129*c0909341SAndroid Build Coastguard Workerpw_m2520x8:     times 8 dw -2520*8
130*c0909341SAndroid Build Coastguard Workerpw_3564x8:      times 8 dw  3564*8
131*c0909341SAndroid Build Coastguard Workerpw_2019x8:      times 8 dw  2019*8
132*c0909341SAndroid Build Coastguard Workerpw_3948x8:      times 8 dw  3948*8
133*c0909341SAndroid Build Coastguard Workerpw_m1092x8:     times 8 dw -1092*8
134*c0909341SAndroid Build Coastguard Workerpw_3889x8:      times 8 dw  3889*8
135*c0909341SAndroid Build Coastguard Workerpw_1285x8:      times 8 dw  1285*8
136*c0909341SAndroid Build Coastguard Workerpw_3659x8:      times 8 dw  3659*8
137*c0909341SAndroid Build Coastguard Workerpw_m1842x8:     times 8 dw -1842*8
138*c0909341SAndroid Build Coastguard Workerpw_3102x8:      times 8 dw  3102*8
139*c0909341SAndroid Build Coastguard Workerpw_2675x8:      times 8 dw  2675*8
140*c0909341SAndroid Build Coastguard Workerpw_4085x8:      times 8 dw  4085*8
141*c0909341SAndroid Build Coastguard Workerpw_m301x8:      times 8 dw  -301*8
142*c0909341SAndroid Build Coastguard Worker
143*c0909341SAndroid Build Coastguard WorkerSECTION .text
144*c0909341SAndroid Build Coastguard Worker
145*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
146*c0909341SAndroid Build Coastguard Worker
147*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64
148*c0909341SAndroid Build Coastguard Worker%define o(x) x
149*c0909341SAndroid Build Coastguard Worker%else
150*c0909341SAndroid Build Coastguard Worker%define o(x) r5-$$+x ; PIC
151*c0909341SAndroid Build Coastguard Worker%endif
152*c0909341SAndroid Build Coastguard Worker
153*c0909341SAndroid Build Coastguard Worker%macro WRITE_4X4 9  ;src[1-2], tmp[1-3], row[1-4]
154*c0909341SAndroid Build Coastguard Worker    lea                  r2, [dstq+strideq*2]
155*c0909341SAndroid Build Coastguard Worker%assign %%i 1
156*c0909341SAndroid Build Coastguard Worker%rotate 5
157*c0909341SAndroid Build Coastguard Worker%rep 4
158*c0909341SAndroid Build Coastguard Worker    %if %1 & 2
159*c0909341SAndroid Build Coastguard Worker        CAT_XDEFINE %%row_adr, %%i, r2   + strideq*(%1&1)
160*c0909341SAndroid Build Coastguard Worker    %else
161*c0909341SAndroid Build Coastguard Worker        CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1)
162*c0909341SAndroid Build Coastguard Worker    %endif
163*c0909341SAndroid Build Coastguard Worker    %assign %%i %%i + 1
164*c0909341SAndroid Build Coastguard Worker    %rotate 1
165*c0909341SAndroid Build Coastguard Worker%endrep
166*c0909341SAndroid Build Coastguard Worker
167*c0909341SAndroid Build Coastguard Worker    movd                 m%3, [%%row_adr1]        ;dst0
168*c0909341SAndroid Build Coastguard Worker    movd                 m%5, [%%row_adr2]        ;dst1
169*c0909341SAndroid Build Coastguard Worker    punpckldq            m%3, m%5                 ;high: dst1 :low: dst0
170*c0909341SAndroid Build Coastguard Worker    movd                 m%4, [%%row_adr3]        ;dst2
171*c0909341SAndroid Build Coastguard Worker    movd                 m%5, [%%row_adr4]        ;dst3
172*c0909341SAndroid Build Coastguard Worker    punpckldq            m%4, m%5                 ;high: dst3 :low: dst2
173*c0909341SAndroid Build Coastguard Worker
174*c0909341SAndroid Build Coastguard Worker    pxor                 m%5, m%5
175*c0909341SAndroid Build Coastguard Worker    punpcklbw            m%3, m%5                 ;extend byte to word
176*c0909341SAndroid Build Coastguard Worker    punpcklbw            m%4, m%5                 ;extend byte to word
177*c0909341SAndroid Build Coastguard Worker
178*c0909341SAndroid Build Coastguard Worker    paddw                m%3, m%1                 ;high: dst1 + out1 ;low: dst0 + out0
179*c0909341SAndroid Build Coastguard Worker    paddw                m%4, m%2                 ;high: dst3 + out3 ;low: dst2 + out2
180*c0909341SAndroid Build Coastguard Worker
181*c0909341SAndroid Build Coastguard Worker    packuswb             m%3, m%4                 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0
182*c0909341SAndroid Build Coastguard Worker
183*c0909341SAndroid Build Coastguard Worker    movd        [%%row_adr1], m%3                  ;store dst0 + out0
184*c0909341SAndroid Build Coastguard Worker    pshuflw              m%4, m%3, q1032
185*c0909341SAndroid Build Coastguard Worker    movd        [%%row_adr2], m%4                  ;store dst1 + out1
186*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m%3, m%3
187*c0909341SAndroid Build Coastguard Worker    movd        [%%row_adr3], m%3                  ;store dst2 + out2
188*c0909341SAndroid Build Coastguard Worker    psrlq                m%3, 32
189*c0909341SAndroid Build Coastguard Worker    movd        [%%row_adr4], m%3                  ;store dst3 + out3
190*c0909341SAndroid Build Coastguard Worker%endmacro
191*c0909341SAndroid Build Coastguard Worker
192*c0909341SAndroid Build Coastguard Worker%macro ITX4_END 4-5 2048 ; row[1-4], rnd
193*c0909341SAndroid Build Coastguard Worker%if %5
194*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_%5)]
195*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
196*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m2
197*c0909341SAndroid Build Coastguard Worker%endif
198*c0909341SAndroid Build Coastguard Worker
199*c0909341SAndroid Build Coastguard Worker    WRITE_4X4            0, 1, 2, 3, 4, %1, %2, %3, %4
200*c0909341SAndroid Build Coastguard Worker    ret
201*c0909341SAndroid Build Coastguard Worker%endmacro
202*c0909341SAndroid Build Coastguard Worker
203*c0909341SAndroid Build Coastguard Worker; flags: 1 = swap, 2: coef_regs, 4: no_pack
204*c0909341SAndroid Build Coastguard Worker%macro ITX_MUL2X_PACK 5-6 0 ; dst/src, tmp[1], rnd, coef[1-2], flags
205*c0909341SAndroid Build Coastguard Worker%if %6 & 2
206*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%2, m%4, m%1
207*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%1, m%5
208*c0909341SAndroid Build Coastguard Worker%elif %6 & 1
209*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%2, m%1, [o(pw_%5_%4)]
210*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%1, [o(pw_%4_m%5)]
211*c0909341SAndroid Build Coastguard Worker%else
212*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%2, m%1, [o(pw_%4_m%5)]
213*c0909341SAndroid Build Coastguard Worker    pmaddwd              m%1, [o(pw_%5_%4)]
214*c0909341SAndroid Build Coastguard Worker%endif
215*c0909341SAndroid Build Coastguard Worker    paddd                m%2, m%3
216*c0909341SAndroid Build Coastguard Worker    paddd                m%1, m%3
217*c0909341SAndroid Build Coastguard Worker    psrad                m%2, 12
218*c0909341SAndroid Build Coastguard Worker    psrad                m%1, 12
219*c0909341SAndroid Build Coastguard Worker%if %6 & 4 == 0
220*c0909341SAndroid Build Coastguard Worker    packssdw             m%1, m%2
221*c0909341SAndroid Build Coastguard Worker%endif
222*c0909341SAndroid Build Coastguard Worker%endmacro
223*c0909341SAndroid Build Coastguard Worker
224*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED 0-1   ;pw_2896x8
225*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pd_2048)]
226*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1            ;unpacked in1 in3
227*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1                ;unpacked in0 in2
228*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        2, 1, 3, 1567, 3784
229*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        0, 1, 3, 2896, 2896
230*c0909341SAndroid Build Coastguard Worker    psubsw               m1, m0, m2            ;high: out2 ;low: out3
231*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m2                ;high: out1 ;low: out0
232*c0909341SAndroid Build Coastguard Worker%endmacro
233*c0909341SAndroid Build Coastguard Worker
234*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 4+ ; type1, type2, size, xmm/stack
235*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%3_8bpc, 4, 6, %4, dst, stride, coeff, eob, tx2
236*c0909341SAndroid Build Coastguard Worker    %define %%p1 m(i%1_%3_internal_8bpc)
237*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
238*c0909341SAndroid Build Coastguard Worker    LEA                    r5, $$
239*c0909341SAndroid Build Coastguard Worker%endif
240*c0909341SAndroid Build Coastguard Worker%if has_epilogue
241*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
242*c0909341SAndroid Build Coastguard Worker    test                 eobd, eobd
243*c0909341SAndroid Build Coastguard Worker    jz %%end
244*c0909341SAndroid Build Coastguard Worker%endif
245*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(i%2_%3_internal_8bpc).pass2)]
246*c0909341SAndroid Build Coastguard Worker    call %%p1
247*c0909341SAndroid Build Coastguard Worker    RET
248*c0909341SAndroid Build Coastguard Worker%%end:
249*c0909341SAndroid Build Coastguard Worker%else
250*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(i%2_%3_internal_8bpc).pass2)]
251*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
252*c0909341SAndroid Build Coastguard Worker    test                 eobd, eobd
253*c0909341SAndroid Build Coastguard Worker    jnz %%p1
254*c0909341SAndroid Build Coastguard Worker%else
255*c0909341SAndroid Build Coastguard Worker    times ((%%end - %%p1) >> 31) & 1 jmp %%p1
256*c0909341SAndroid Build Coastguard WorkerALIGN function_align
257*c0909341SAndroid Build Coastguard Worker%%end:
258*c0909341SAndroid Build Coastguard Worker%endif
259*c0909341SAndroid Build Coastguard Worker%endif
260*c0909341SAndroid Build Coastguard Worker%endmacro
261*c0909341SAndroid Build Coastguard Worker
262*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X4_FN 2 ; type1, type2
263*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 4x4, 6
264*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
265*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, [coeffq], q0000
266*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m0
267*c0909341SAndroid Build Coastguard Worker    mova                 m1, [o(pw_2896x8)]
268*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
269*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd                ;0
270*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
271*c0909341SAndroid Build Coastguard Worker    mova                 m1, m0
272*c0909341SAndroid Build Coastguard Worker    TAIL_CALL m(iadst_4x4_internal_8bpc).end2
273*c0909341SAndroid Build Coastguard Worker%endif
274*c0909341SAndroid Build Coastguard Worker%endmacro
275*c0909341SAndroid Build Coastguard Worker
276*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3
277*c0909341SAndroid Build Coastguard Worker; itx16 relies on dct_dct being the first function. If you change the order, adjust `itx8_start` in itx16.
278*c0909341SAndroid Build Coastguard Worker
279*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct
280*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst
281*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst
282*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity
283*c0909341SAndroid Build Coastguard Worker
284*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
285*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]      ;high: in1 ;low: in0
286*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*1]      ;high: in3 ;low in2
287*c0909341SAndroid Build Coastguard Worker
288*c0909341SAndroid Build Coastguard Worker    IDCT4_1D_PACKED
289*c0909341SAndroid Build Coastguard Worker
290*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(deint_shuf)]
291*c0909341SAndroid Build Coastguard Worker    shufps               m3, m0, m1, q1331
292*c0909341SAndroid Build Coastguard Worker    shufps               m0, m1, q0220
293*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m2                 ;high: in1 ;low: in0
294*c0909341SAndroid Build Coastguard Worker    pshufb               m1, m3, m2             ;high: in3 ;low :in2
295*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
296*c0909341SAndroid Build Coastguard Worker
297*c0909341SAndroid Build Coastguard Worker.pass2:
298*c0909341SAndroid Build Coastguard Worker    IDCT4_1D_PACKED
299*c0909341SAndroid Build Coastguard Worker
300*c0909341SAndroid Build Coastguard Worker    pxor                 m2, m2
301*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m2
302*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m2                 ;memset(coeff, 0, sizeof(*coeff) * sh * sw);
303*c0909341SAndroid Build Coastguard Worker
304*c0909341SAndroid Build Coastguard Worker    ITX4_END     0, 1, 3, 2
305*c0909341SAndroid Build Coastguard Worker
306*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct
307*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst
308*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst
309*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity
310*c0909341SAndroid Build Coastguard Worker
311*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
312*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]
313*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*1]
314*c0909341SAndroid Build Coastguard Worker    call .main
315*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1
316*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
317*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m2       ;high: in3 ;low :in2
318*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2           ;high: in1 ;low: in0
319*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
320*c0909341SAndroid Build Coastguard Worker
321*c0909341SAndroid Build Coastguard Worker.pass2:
322*c0909341SAndroid Build Coastguard Worker    call .main
323*c0909341SAndroid Build Coastguard Worker
324*c0909341SAndroid Build Coastguard Worker.end:
325*c0909341SAndroid Build Coastguard Worker    pxor                 m2, m2
326*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m2
327*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m2
328*c0909341SAndroid Build Coastguard Worker
329*c0909341SAndroid Build Coastguard Worker.end2:
330*c0909341SAndroid Build Coastguard Worker    ITX4_END              0, 1, 2, 3
331*c0909341SAndroid Build Coastguard Worker
332*c0909341SAndroid Build Coastguard WorkerALIGN function_align
333*c0909341SAndroid Build Coastguard Workercglobal_label .main
334*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m0, m1                ;unpacked in0 in2
335*c0909341SAndroid Build Coastguard Worker    punpckhwd            m0, m1                    ;unpacked in1 in3
336*c0909341SAndroid Build Coastguard Worker    mova                 m3, m0
337*c0909341SAndroid Build Coastguard Worker    pmaddwd              m1, m2, [o(pw_3344_m3344)];3344 * in0 - 3344 * in2
338*c0909341SAndroid Build Coastguard Worker    pmaddwd              m0, [o(pw_0_3344)]        ;3344 * in3
339*c0909341SAndroid Build Coastguard Worker    paddd                m1, m0                    ;t2
340*c0909341SAndroid Build Coastguard Worker    pmaddwd              m0, m2, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2
341*c0909341SAndroid Build Coastguard Worker    pmaddwd              m2, [o(pw_2482_m1321)]    ;2482 * in0 - 1321 * in2
342*c0909341SAndroid Build Coastguard Worker    pmaddwd              m4, m3, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3
343*c0909341SAndroid Build Coastguard Worker    pmaddwd              m5, m3, [o(pw_3344_m3803)];3344 * in1 - 3803 * in3
344*c0909341SAndroid Build Coastguard Worker    paddd                m4, m0                    ;t0 + t3
345*c0909341SAndroid Build Coastguard Worker    pmaddwd              m3, [o(pw_m6688_m3803)]   ;-2 * 3344 * in1 - 3803 * in3
346*c0909341SAndroid Build Coastguard Worker    mova                 m0, [o(pd_2048)]
347*c0909341SAndroid Build Coastguard Worker    paddd                m1, m0                    ;t2 + 2048
348*c0909341SAndroid Build Coastguard Worker    paddd                m2, m0
349*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4                    ;t0 + t3 + 2048
350*c0909341SAndroid Build Coastguard Worker    paddd                m5, m2                    ;t1 + t3 + 2048
351*c0909341SAndroid Build Coastguard Worker    paddd                m2, m4
352*c0909341SAndroid Build Coastguard Worker    paddd                m2, m3                    ;t0 + t1 - t3 + 2048
353*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m1, m0, m5, m2
354*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m5                    ;high: out1 ;low: out0
355*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m2                    ;high: out3 ;low: out3
356*c0909341SAndroid Build Coastguard Worker    ret
357*c0909341SAndroid Build Coastguard Worker
358*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct
359*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst
360*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst
361*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity
362*c0909341SAndroid Build Coastguard Worker
363*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
364*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]
365*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*1]
366*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_8bpc).main
367*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m1, m0
368*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0
369*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1, m2            ;high: in3 ;low :in2
370*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2                ;high: in1 ;low: in0
371*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
372*c0909341SAndroid Build Coastguard Worker
373*c0909341SAndroid Build Coastguard Worker.pass2:
374*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_8bpc).main
375*c0909341SAndroid Build Coastguard Worker
376*c0909341SAndroid Build Coastguard Worker.end:
377*c0909341SAndroid Build Coastguard Worker    pxor                 m2, m2
378*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m2
379*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m2
380*c0909341SAndroid Build Coastguard Worker
381*c0909341SAndroid Build Coastguard Worker.end2:
382*c0909341SAndroid Build Coastguard Worker    ITX4_END              3, 2, 1, 0
383*c0909341SAndroid Build Coastguard Worker
384*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct
385*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst
386*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst
387*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity
388*c0909341SAndroid Build Coastguard Worker
389*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
390*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]
391*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*1]
392*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_1697x8)]
393*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m0, m3
394*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m1
395*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m2
396*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m3
397*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1
398*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
399*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m2            ;high: in3 ;low :in2
400*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2                ;high: in1 ;low: in0
401*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
402*c0909341SAndroid Build Coastguard Worker
403*c0909341SAndroid Build Coastguard Worker.pass2:
404*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_1697x8)]
405*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, m0
406*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m1
407*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m2
408*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m3
409*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_8bpc).end
410*c0909341SAndroid Build Coastguard Worker
411*c0909341SAndroid Build Coastguard Worker%macro IWHT4_1D_PACKED 0
412*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m0, m1            ;low: in1 high: in3
413*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m1                ;low: in0 high: in2
414*c0909341SAndroid Build Coastguard Worker    psubw                m2, m0, m3            ;low: in0 - in1 high: in2 - in3
415*c0909341SAndroid Build Coastguard Worker    paddw                m0, m3                ;low: in0 + in1 high: in2 + in3
416*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m2                ;t2 t2
417*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m0                ;t0 t0
418*c0909341SAndroid Build Coastguard Worker    psubw                m1, m0, m2
419*c0909341SAndroid Build Coastguard Worker    psraw                m1, 1                 ;t4 t4
420*c0909341SAndroid Build Coastguard Worker    psubw                m1, m3                ;low: t1/out2 high: t3/out1
421*c0909341SAndroid Build Coastguard Worker    psubw                m0, m1                ;high: out0
422*c0909341SAndroid Build Coastguard Worker    paddw                m2, m1                ;low: out3
423*c0909341SAndroid Build Coastguard Worker%endmacro
424*c0909341SAndroid Build Coastguard Worker
425*c0909341SAndroid Build Coastguard WorkerINIT_XMM sse2
426*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff
427*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]
428*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*1]
429*c0909341SAndroid Build Coastguard Worker    pxor                 m2, m2
430*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m2
431*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m2
432*c0909341SAndroid Build Coastguard Worker    psraw                m0, 2
433*c0909341SAndroid Build Coastguard Worker    psraw                m1, 2
434*c0909341SAndroid Build Coastguard Worker    IWHT4_1D_PACKED
435*c0909341SAndroid Build Coastguard Worker    punpckhwd            m0, m1
436*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m1, m2
437*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m3
438*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m3
439*c0909341SAndroid Build Coastguard Worker    IWHT4_1D_PACKED
440*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m2, 0x01
441*c0909341SAndroid Build Coastguard Worker    ITX4_END              0, 3, 2, 1, 0
442*c0909341SAndroid Build Coastguard Worker
443*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D_PACKED 0
444*c0909341SAndroid Build Coastguard Worker    mova                 m6, [o(pd_2048)]
445*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m3                 ;unpacked in1 in7
446*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2                     ;unpacked in0 in4
447*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m1                     ;unpacked in5 in3
448*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m3                     ;unpacked in2 in6
449*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        4, 3, 6,  799, 4017       ;low: t7a high: t4a
450*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        2, 3, 6, 3406, 2276       ;low: t6a high: t5a
451*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        1, 3, 6, 1567, 3784       ;low: t3  high: t2
452*c0909341SAndroid Build Coastguard Worker    psubsw               m3, m4, m2                 ;low: t6a high: t5a
453*c0909341SAndroid Build Coastguard Worker    paddsw               m4, m2                     ;low: t7  high: t4
454*c0909341SAndroid Build Coastguard Worker    pshufb               m3, [o(deint_shuf1)]
455*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        0, 2, 6, 2896, 2896       ;low: t0  high: t1
456*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        3, 2, 6, 2896, 2896       ;low: t6  high: t5
457*c0909341SAndroid Build Coastguard Worker    psubsw               m2, m0, m1                 ;low: tmp3 high: tmp2
458*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m1                     ;low: tmp0 high: tmp1
459*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m1, m4, m3                 ;low: t7   high: t6
460*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m3                     ;low: t4   high: t5
461*c0909341SAndroid Build Coastguard Worker    psubsw               m3, m0, m1                 ;low: out7 high: out6
462*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m1                     ;low: out0 high: out1
463*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m2, m4                 ;low: out3 high: out2
464*c0909341SAndroid Build Coastguard Worker    psubsw               m2, m4                     ;low: out4 high: out5
465*c0909341SAndroid Build Coastguard Worker%endmacro
466*c0909341SAndroid Build Coastguard Worker
467*c0909341SAndroid Build Coastguard Worker;dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12
468*c0909341SAndroid Build Coastguard Worker;dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12
469*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2W 7-8 0 ; dst/src[1-2], tmp[1-2], rnd, coef[1-2], dst2_in_tmp1
470*c0909341SAndroid Build Coastguard Worker    punpckhwd           m%4, m%1, m%2
471*c0909341SAndroid Build Coastguard Worker    punpcklwd           m%1, m%2
472*c0909341SAndroid Build Coastguard Worker%if %7 < 8
473*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%2, m%7, m%1
474*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%3, m%7, m%4
475*c0909341SAndroid Build Coastguard Worker%else
476*c0909341SAndroid Build Coastguard Worker    mova                m%2, [o(pw_%7_%6)]
477*c0909341SAndroid Build Coastguard Worker%if %8
478*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%3, m%1, m%2
479*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%2, m%4
480*c0909341SAndroid Build Coastguard Worker%else
481*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%3, m%4, m%2
482*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%2, m%1
483*c0909341SAndroid Build Coastguard Worker%endif
484*c0909341SAndroid Build Coastguard Worker%endif
485*c0909341SAndroid Build Coastguard Worker    paddd               m%3, m%5
486*c0909341SAndroid Build Coastguard Worker    paddd               m%2, m%5
487*c0909341SAndroid Build Coastguard Worker    psrad               m%3, 12
488*c0909341SAndroid Build Coastguard Worker    psrad               m%2, 12
489*c0909341SAndroid Build Coastguard Worker%if %8
490*c0909341SAndroid Build Coastguard Worker    packssdw            m%3, m%2
491*c0909341SAndroid Build Coastguard Worker%else
492*c0909341SAndroid Build Coastguard Worker    packssdw            m%2, m%3                 ;dst2
493*c0909341SAndroid Build Coastguard Worker%endif
494*c0909341SAndroid Build Coastguard Worker%if %7 < 8
495*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%4, m%6
496*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%1, m%6
497*c0909341SAndroid Build Coastguard Worker%elif %8
498*c0909341SAndroid Build Coastguard Worker    mova                m%2, [o(pw_%6_m%7)]
499*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%4, m%2
500*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%1, m%2
501*c0909341SAndroid Build Coastguard Worker%else
502*c0909341SAndroid Build Coastguard Worker    mova                m%3, [o(pw_%6_m%7)]
503*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%4, m%3
504*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%1, m%3
505*c0909341SAndroid Build Coastguard Worker%endif
506*c0909341SAndroid Build Coastguard Worker    paddd               m%4, m%5
507*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%5
508*c0909341SAndroid Build Coastguard Worker    psrad               m%4, 12
509*c0909341SAndroid Build Coastguard Worker    psrad               m%1, 12
510*c0909341SAndroid Build Coastguard Worker    packssdw            m%1, m%4                 ;dst1
511*c0909341SAndroid Build Coastguard Worker%endmacro
512*c0909341SAndroid Build Coastguard Worker
513*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D 7 ; src[1-4], tmp[1-2], pd_2048
514*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W        %2, %4, %5, %6, %7, 1567, 3784, 1 ;t2, t3
515*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W        %1, %3, %4, %6, %7, 2896, 2896, 1 ;t1, t0
516*c0909341SAndroid Build Coastguard Worker    psubsw              m%3, m%1, m%2                      ;out2
517*c0909341SAndroid Build Coastguard Worker    paddsw              m%2, m%1                           ;out1
518*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%5, m%4                      ;out0
519*c0909341SAndroid Build Coastguard Worker    psubsw              m%4, m%5                           ;out3
520*c0909341SAndroid Build Coastguard Worker%endmacro
521*c0909341SAndroid Build Coastguard Worker
522*c0909341SAndroid Build Coastguard Worker%macro WRITE_4X8 4 ;row[1-4]
523*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             0, 1, 4, 5, 6, %1, %2, %3, %4
524*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
525*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             2, 3, 4, 5, 6, %1, %2, %3, %4
526*c0909341SAndroid Build Coastguard Worker%endmacro
527*c0909341SAndroid Build Coastguard Worker
528*c0909341SAndroid Build Coastguard Worker%macro INV_4X8 0
529*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m2, m3
530*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
531*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m0, m1
532*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
533*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m2                  ;low: in2 high: in3
534*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2                      ;low: in0 high: in1
535*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m3, m4                  ;low: in4 high: in5
536*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m4                      ;low: in6 high: in7
537*c0909341SAndroid Build Coastguard Worker%endmacro
538*c0909341SAndroid Build Coastguard Worker
539*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X8_FN 2 ; type1, type2
540*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 4x8, 8
541*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
542*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, [coeffq], q0000
543*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m0
544*c0909341SAndroid Build Coastguard Worker    mova                 m1, [o(pw_2896x8)]
545*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
546*c0909341SAndroid Build Coastguard Worker    mov           [coeffq], eobd
547*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
548*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
549*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, [o(pw_2048)]
550*c0909341SAndroid Build Coastguard Worker    mova                 m1, m0
551*c0909341SAndroid Build Coastguard Worker    mova                 m2, m0
552*c0909341SAndroid Build Coastguard Worker    mova                 m3, m0
553*c0909341SAndroid Build Coastguard Worker    TAIL_CALL m(iadst_4x8_internal_8bpc).end3
554*c0909341SAndroid Build Coastguard Worker%endif
555*c0909341SAndroid Build Coastguard Worker%endmacro
556*c0909341SAndroid Build Coastguard Worker
557*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3
558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct
559*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst
560*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst
561*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity
562*c0909341SAndroid Build Coastguard Worker
563*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
564*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
565*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
566*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
567*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
568*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
569*c0909341SAndroid Build Coastguard Worker
570*c0909341SAndroid Build Coastguard Worker.pass1:
571*c0909341SAndroid Build Coastguard Worker    call m(idct_8x4_internal_8bpc).main
572*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).pass1_end
573*c0909341SAndroid Build Coastguard Worker
574*c0909341SAndroid Build Coastguard Worker.pass2:
575*c0909341SAndroid Build Coastguard Worker    call .main
576*c0909341SAndroid Build Coastguard Worker    shufps               m1, m1, q1032
577*c0909341SAndroid Build Coastguard Worker    shufps               m3, m3, q1032
578*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pw_2048)]
579*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).end2
580*c0909341SAndroid Build Coastguard Worker
581*c0909341SAndroid Build Coastguard WorkerALIGN function_align
582*c0909341SAndroid Build Coastguard Workercglobal_label .main
583*c0909341SAndroid Build Coastguard Worker    IDCT8_1D_PACKED
584*c0909341SAndroid Build Coastguard Worker    ret
585*c0909341SAndroid Build Coastguard Worker
586*c0909341SAndroid Build Coastguard Worker
587*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct
588*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst
589*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst
590*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity
591*c0909341SAndroid Build Coastguard Worker
592*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
593*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
594*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
595*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
596*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
597*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
598*c0909341SAndroid Build Coastguard Worker
599*c0909341SAndroid Build Coastguard Worker.pass1:
600*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_8bpc).main
601*c0909341SAndroid Build Coastguard Worker
602*c0909341SAndroid Build Coastguard Worker.pass1_end:
603*c0909341SAndroid Build Coastguard Worker    INV_4X8
604*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
605*c0909341SAndroid Build Coastguard Worker
606*c0909341SAndroid Build Coastguard Worker.pass2:
607*c0909341SAndroid Build Coastguard Worker    shufps               m0, m0, q1032
608*c0909341SAndroid Build Coastguard Worker    shufps               m1, m1, q1032
609*c0909341SAndroid Build Coastguard Worker    call .main
610*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pw_2048)]
611*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
612*c0909341SAndroid Build Coastguard Worker    psubw                m5, m4
613*c0909341SAndroid Build Coastguard Worker
614*c0909341SAndroid Build Coastguard Worker.end:
615*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m5
616*c0909341SAndroid Build Coastguard Worker
617*c0909341SAndroid Build Coastguard Worker.end2:
618*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
619*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m4
620*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m4
621*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m4
622*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
623*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m5
624*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m5
625*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*2], m5
626*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*3], m5
627*c0909341SAndroid Build Coastguard Worker
628*c0909341SAndroid Build Coastguard Worker.end3:
629*c0909341SAndroid Build Coastguard Worker    WRITE_4X8             0, 1, 2, 3
630*c0909341SAndroid Build Coastguard Worker    RET
631*c0909341SAndroid Build Coastguard Worker
632*c0909341SAndroid Build Coastguard WorkerALIGN function_align
633*c0909341SAndroid Build Coastguard Workercglobal_label .main
634*c0909341SAndroid Build Coastguard Worker    mova                 m6, [o(pd_2048)]
635*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m3, m0                ;unpacked in7 in0
636*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m2, m1                ;unpacked in5 in2
637*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m2                    ;unpacked in3 in4
638*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m3                    ;unpacked in1 in6
639*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        4, 2, 6,  401, 4076      ;low:  t0a   high:  t1a
640*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        5, 2, 6, 1931, 3612      ;low:  t2a   high:  t3a
641*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        1, 2, 6, 3166, 2598      ;low:  t4a   high:  t5a
642*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        0, 2, 6, 3920, 1189      ;low:  t6a   high:  t7a
643*c0909341SAndroid Build Coastguard Worker
644*c0909341SAndroid Build Coastguard Worker    psubsw               m3, m4, m1                ;low:  t4    high:  t5
645*c0909341SAndroid Build Coastguard Worker    paddsw               m4, m1                    ;low:  t0    high:  t1
646*c0909341SAndroid Build Coastguard Worker    psubsw               m2, m5, m0                ;low:  t6    high:  t7
647*c0909341SAndroid Build Coastguard Worker    paddsw               m5, m0                    ;low:  t2    high:  t3
648*c0909341SAndroid Build Coastguard Worker
649*c0909341SAndroid Build Coastguard Worker    shufps               m1, m3, m2, q1032
650*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m1
651*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m1
652*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        3, 0, 6, 1567, 3784, 1   ;low:  t5a   high:  t4a
653*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        2, 0, 6, 3784, 1567      ;low:  t7a   high:  t6a
654*c0909341SAndroid Build Coastguard Worker
655*c0909341SAndroid Build Coastguard Worker    psubsw               m1, m4, m5                ;low:  t2    high:  t3
656*c0909341SAndroid Build Coastguard Worker    paddsw               m4, m5                    ;low:  out0  high: -out7
657*c0909341SAndroid Build Coastguard Worker    psubsw               m5, m3, m2                ;low:  t7    high:  t6
658*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m2                    ;low:  out6  high: -out1
659*c0909341SAndroid Build Coastguard Worker    shufps               m0, m4, m3, q3210         ;low:  out0  high: -out1
660*c0909341SAndroid Build Coastguard Worker    shufps               m3, m4, q3210             ;low:  out6  high: -out7
661*c0909341SAndroid Build Coastguard Worker
662*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_2896_m2896)]
663*c0909341SAndroid Build Coastguard Worker    mova                 m7, [o(pw_2896_2896)]
664*c0909341SAndroid Build Coastguard Worker    shufps               m4, m1, m5, q1032         ;low:  t3    high:  t7
665*c0909341SAndroid Build Coastguard Worker    shufps               m1, m5, q3210             ;low:  t2    high:  t6
666*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m1, m4
667*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m4
668*c0909341SAndroid Build Coastguard Worker    pmaddwd              m4, m2, m1                ;-out5
669*c0909341SAndroid Build Coastguard Worker    pmaddwd              m2, m5                    ; out4
670*c0909341SAndroid Build Coastguard Worker    pmaddwd              m1, m7                    ; out2
671*c0909341SAndroid Build Coastguard Worker    pmaddwd              m5, m7                    ;-out3
672*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m6}, m4, m2, m1, m5
673*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m4, m2, m1, m5
674*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5                    ;low:  out2  high: -out3
675*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m4                    ;low:  out4  high: -out5
676*c0909341SAndroid Build Coastguard Worker    ret
677*c0909341SAndroid Build Coastguard Worker
678*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct
679*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst
680*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst
681*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity
682*c0909341SAndroid Build Coastguard Worker
683*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
684*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
685*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
686*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
687*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
688*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
689*c0909341SAndroid Build Coastguard Worker
690*c0909341SAndroid Build Coastguard Worker.pass1:
691*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_8bpc).main
692*c0909341SAndroid Build Coastguard Worker
693*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m3, m2
694*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m2
695*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m1, m0
696*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0
697*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m3, m1                  ;low: in4 high: in5
698*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m1                      ;low: in6 high: in7
699*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m4, m5                  ;low: in0 high: in1
700*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m4, m5                  ;low: in2 high: in3
701*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
702*c0909341SAndroid Build Coastguard Worker
703*c0909341SAndroid Build Coastguard Worker.pass2:
704*c0909341SAndroid Build Coastguard Worker    shufps               m0, m0, q1032
705*c0909341SAndroid Build Coastguard Worker    shufps               m1, m1, q1032
706*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_8bpc).main
707*c0909341SAndroid Build Coastguard Worker
708*c0909341SAndroid Build Coastguard Worker    mova                 m4, m0
709*c0909341SAndroid Build Coastguard Worker    mova                 m5, m1
710*c0909341SAndroid Build Coastguard Worker    pshufd               m0, m3, q1032
711*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m2, q1032
712*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m5, q1032
713*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m4, q1032
714*c0909341SAndroid Build Coastguard Worker    mova                 m5, [o(pw_2048)]
715*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
716*c0909341SAndroid Build Coastguard Worker    psubw                m4, m5
717*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).end
718*c0909341SAndroid Build Coastguard Worker
719*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct
720*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst
721*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst
722*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity
723*c0909341SAndroid Build Coastguard Worker
724*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
725*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
726*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
727*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
728*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
729*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
730*c0909341SAndroid Build Coastguard Worker
731*c0909341SAndroid Build Coastguard Worker.pass1:
732*c0909341SAndroid Build Coastguard Worker    mova                 m7, [o(pw_1697x8)]
733*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m7, m0
734*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m7, m1
735*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m7, m2
736*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, m3
737*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m4
738*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m5
739*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m6
740*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m7
741*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).pass1_end
742*c0909341SAndroid Build Coastguard Worker
743*c0909341SAndroid Build Coastguard Worker.pass2:
744*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pw_4096)]
745*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).end2
746*c0909341SAndroid Build Coastguard Worker
747*c0909341SAndroid Build Coastguard Worker
748*c0909341SAndroid Build Coastguard Worker%macro WRITE_8X2 5       ;coefs[1-2], tmp[1-3]
749*c0909341SAndroid Build Coastguard Worker    movq                 m%3, [dstq        ]
750*c0909341SAndroid Build Coastguard Worker    movq                 m%4, [dstq+strideq]
751*c0909341SAndroid Build Coastguard Worker    pxor                 m%5, m%5
752*c0909341SAndroid Build Coastguard Worker    punpcklbw            m%3, m%5                 ;extend byte to word
753*c0909341SAndroid Build Coastguard Worker    punpcklbw            m%4, m%5                 ;extend byte to word
754*c0909341SAndroid Build Coastguard Worker%ifnum %1
755*c0909341SAndroid Build Coastguard Worker    paddw                m%3, m%1
756*c0909341SAndroid Build Coastguard Worker%else
757*c0909341SAndroid Build Coastguard Worker    paddw                m%3, %1
758*c0909341SAndroid Build Coastguard Worker%endif
759*c0909341SAndroid Build Coastguard Worker%ifnum %2
760*c0909341SAndroid Build Coastguard Worker    paddw                m%4, m%2
761*c0909341SAndroid Build Coastguard Worker%else
762*c0909341SAndroid Build Coastguard Worker    paddw                m%4, %2
763*c0909341SAndroid Build Coastguard Worker%endif
764*c0909341SAndroid Build Coastguard Worker    packuswb             m%3, m%4
765*c0909341SAndroid Build Coastguard Worker    movq      [dstq        ], m%3
766*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m%3, m%3
767*c0909341SAndroid Build Coastguard Worker    movq      [dstq+strideq], m%3
768*c0909341SAndroid Build Coastguard Worker%endmacro
769*c0909341SAndroid Build Coastguard Worker
770*c0909341SAndroid Build Coastguard Worker%macro WRITE_8X4 7      ;coefs[1-4], tmp[1-3]
771*c0909341SAndroid Build Coastguard Worker    WRITE_8X2             %1, %2, %5, %6, %7
772*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
773*c0909341SAndroid Build Coastguard Worker    WRITE_8X2             %3, %4, %5, %6, %7
774*c0909341SAndroid Build Coastguard Worker%endmacro
775*c0909341SAndroid Build Coastguard Worker
776*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X4_FN 2 ; type1, type2
777*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 8x4, 8
778*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
779*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, [coeffq], q0000
780*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m0
781*c0909341SAndroid Build Coastguard Worker    mova                 m1, [o(pw_2896x8)]
782*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
783*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
784*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_2048)]
785*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
786*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
787*c0909341SAndroid Build Coastguard Worker    mova                 m1, m0
788*c0909341SAndroid Build Coastguard Worker    mova                 m2, m0
789*c0909341SAndroid Build Coastguard Worker    mova                 m3, m0
790*c0909341SAndroid Build Coastguard Worker    TAIL_CALL m(iadst_8x4_internal_8bpc).end2
791*c0909341SAndroid Build Coastguard Worker%endif
792*c0909341SAndroid Build Coastguard Worker%endmacro
793*c0909341SAndroid Build Coastguard Worker
794*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct
795*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst
796*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst
797*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity
798*c0909341SAndroid Build Coastguard Worker
799*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
800*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
801*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
802*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
803*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
804*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
805*c0909341SAndroid Build Coastguard Worker
806*c0909341SAndroid Build Coastguard Worker    call m(idct_4x8_internal_8bpc).main
807*c0909341SAndroid Build Coastguard Worker
808*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(deint_shuf1)]
809*c0909341SAndroid Build Coastguard Worker    mova                 m5, [o(deint_shuf2)]
810*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m4
811*c0909341SAndroid Build Coastguard Worker    pshufb               m1, m5
812*c0909341SAndroid Build Coastguard Worker    pshufb               m2, m4
813*c0909341SAndroid Build Coastguard Worker    pshufb               m3, m5
814*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m0, m1
815*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m1
816*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m2, m3
817*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m3
818*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m2                      ;in1
819*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m2                          ;in0
820*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m4, m5                      ;in3
821*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2 ,m4, m5                      ;in2
822*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
823*c0909341SAndroid Build Coastguard Worker
824*c0909341SAndroid Build Coastguard Worker.pass2:
825*c0909341SAndroid Build Coastguard Worker    call .main
826*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_8bpc).end
827*c0909341SAndroid Build Coastguard Worker
828*c0909341SAndroid Build Coastguard WorkerALIGN function_align
829*c0909341SAndroid Build Coastguard Workercglobal_label .main
830*c0909341SAndroid Build Coastguard Worker    mova                 m6, [o(pd_2048)]
831*c0909341SAndroid Build Coastguard Worker    IDCT4_1D             0, 1, 2, 3, 4, 5, 6
832*c0909341SAndroid Build Coastguard Worker    ret
833*c0909341SAndroid Build Coastguard Worker
834*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct
835*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst
836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst
837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity
838*c0909341SAndroid Build Coastguard Worker
839*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
840*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
841*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
842*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
843*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
844*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
845*c0909341SAndroid Build Coastguard Worker
846*c0909341SAndroid Build Coastguard Worker    shufps               m0, m0, q1032
847*c0909341SAndroid Build Coastguard Worker    shufps               m1, m1, q1032
848*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_8bpc).main
849*c0909341SAndroid Build Coastguard Worker
850*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
851*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
852*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
853*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
854*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
855*c0909341SAndroid Build Coastguard Worker    psubsw               m3, m5, m1
856*c0909341SAndroid Build Coastguard Worker    psubsw               m5, m4
857*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m5, m3
858*c0909341SAndroid Build Coastguard Worker    punpckldq            m5, m3
859*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m2
860*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
861*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m5      ;in1
862*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m5          ;in0
863*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3, m4      ;in2
864*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m4          ;in3
865*c0909341SAndroid Build Coastguard Worker    jmp              tx2q
866*c0909341SAndroid Build Coastguard Worker
867*c0909341SAndroid Build Coastguard Worker.pass2:
868*c0909341SAndroid Build Coastguard Worker    call .main
869*c0909341SAndroid Build Coastguard Worker
870*c0909341SAndroid Build Coastguard Worker.end:
871*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pw_2048)]
872*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
873*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m4
874*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m4
875*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m4
876*c0909341SAndroid Build Coastguard Worker
877*c0909341SAndroid Build Coastguard Worker.end2:
878*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
879*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*0], m6
880*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m6
881*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*2], m6
882*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*3], m6
883*c0909341SAndroid Build Coastguard Worker.end3:
884*c0909341SAndroid Build Coastguard Worker    WRITE_8X4             0, 1, 2, 3, 4, 5, 6
885*c0909341SAndroid Build Coastguard Worker    RET
886*c0909341SAndroid Build Coastguard Worker
887*c0909341SAndroid Build Coastguard WorkerALIGN function_align
888*c0909341SAndroid Build Coastguard Workercglobal_label .main
889*c0909341SAndroid Build Coastguard Worker    punpckhwd            m6, m0, m2                    ;unpacked in0 in2
890*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2                        ;unpacked in0 in2
891*c0909341SAndroid Build Coastguard Worker    punpckhwd            m7, m1, m3                    ;unpacked in1 in3
892*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m3                        ;unpacked in1 in3
893*c0909341SAndroid Build Coastguard Worker
894*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_3344_m3344)]
895*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pw_0_3344)]
896*c0909341SAndroid Build Coastguard Worker    pmaddwd              m3, m2, m6                    ;3344 * in0 - 3344 * in2
897*c0909341SAndroid Build Coastguard Worker    pmaddwd              m5, m4, m7                    ;3344 * in3
898*c0909341SAndroid Build Coastguard Worker    pmaddwd              m2, m0
899*c0909341SAndroid Build Coastguard Worker    pmaddwd              m4, m1
900*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5
901*c0909341SAndroid Build Coastguard Worker    paddd                m2, m4
902*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pd_2048)]
903*c0909341SAndroid Build Coastguard Worker    paddd                m3, m4                        ;t2 + 2048
904*c0909341SAndroid Build Coastguard Worker    paddd                m2, m4
905*c0909341SAndroid Build Coastguard Worker    psrad                m3, 12
906*c0909341SAndroid Build Coastguard Worker    psrad                m2, 12
907*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3                        ;out2
908*c0909341SAndroid Build Coastguard Worker
909*c0909341SAndroid Build Coastguard Worker    pmaddwd              m4, m0, [o(pw_1321_3803)]     ;1321 * in0 + 3803 * in2
910*c0909341SAndroid Build Coastguard Worker    pmaddwd              m0, [o(pw_2482_m1321)]        ;2482 * in0 - 1321 * in2
911*c0909341SAndroid Build Coastguard Worker    pmaddwd              m3, m1, [o(pw_3344_2482)]     ;3344 * in1 + 2482 * in3
912*c0909341SAndroid Build Coastguard Worker    pmaddwd              m5, m1, [o(pw_3344_m3803)]    ;3344 * in1 - 3803 * in3
913*c0909341SAndroid Build Coastguard Worker    paddd                m3, m4                        ;t0 + t3
914*c0909341SAndroid Build Coastguard Worker
915*c0909341SAndroid Build Coastguard Worker    pmaddwd              m1, [o(pw_m6688_m3803)]       ;-2 * 3344 * in1 - 3803 * in3
916*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pd_2048)]
917*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4
918*c0909341SAndroid Build Coastguard Worker    paddd                m4, m3                        ;t0 + t3 + 2048
919*c0909341SAndroid Build Coastguard Worker    paddd                m5, m0                        ;t1 + t3 + 2048
920*c0909341SAndroid Build Coastguard Worker    paddd                m3, m0
921*c0909341SAndroid Build Coastguard Worker    paddd                m3, m1                        ;t0 + t1 - t3 + 2048
922*c0909341SAndroid Build Coastguard Worker
923*c0909341SAndroid Build Coastguard Worker    psrad                m4, 12                        ;out0
924*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12                        ;out1
925*c0909341SAndroid Build Coastguard Worker    psrad                m3, 12                        ;out3
926*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4, m5                    ;low: out0  high: out1
927*c0909341SAndroid Build Coastguard Worker
928*c0909341SAndroid Build Coastguard Worker    pmaddwd              m4, m6, [o(pw_1321_3803)]     ;1321 * in0 + 3803 * in2
929*c0909341SAndroid Build Coastguard Worker    pmaddwd              m6, [o(pw_2482_m1321)]        ;2482 * in0 - 1321 * in2
930*c0909341SAndroid Build Coastguard Worker    pmaddwd              m1, m7, [o(pw_3344_2482)]     ;3344 * in1 + 2482 * in3
931*c0909341SAndroid Build Coastguard Worker    pmaddwd              m5, m7, [o(pw_3344_m3803)]    ;3344 * in1 - 3803 * in3
932*c0909341SAndroid Build Coastguard Worker    paddd                m1, m4                        ;t0 + t3
933*c0909341SAndroid Build Coastguard Worker    pmaddwd              m7, [o(pw_m6688_m3803)]       ;-2 * 3344 * in1 - 3803 * in3
934*c0909341SAndroid Build Coastguard Worker
935*c0909341SAndroid Build Coastguard Worker    mova                 m4, [o(pd_2048)]
936*c0909341SAndroid Build Coastguard Worker    paddd                m6, m4
937*c0909341SAndroid Build Coastguard Worker    paddd                m4, m1                        ;t0 + t3 + 2048
938*c0909341SAndroid Build Coastguard Worker    paddd                m5, m6                        ;t1 + t3 + 2048
939*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6
940*c0909341SAndroid Build Coastguard Worker    paddd                m1, m7                        ;t0 + t1 - t3 + 2048
941*c0909341SAndroid Build Coastguard Worker
942*c0909341SAndroid Build Coastguard Worker    psrad                m4, 12                        ;out0
943*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12                        ;out1
944*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12                        ;out3
945*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m1                        ;out3
946*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m5                        ;low: out0  high: out1
947*c0909341SAndroid Build Coastguard Worker
948*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m4                    ;out1
949*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m4                        ;out0
950*c0909341SAndroid Build Coastguard Worker    ret
951*c0909341SAndroid Build Coastguard Worker
952*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct
953*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst
954*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst
955*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity
956*c0909341SAndroid Build Coastguard Worker
957*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
958*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
959*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
960*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
961*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
962*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
963*c0909341SAndroid Build Coastguard Worker
964*c0909341SAndroid Build Coastguard Worker    shufps               m0, m0, q1032
965*c0909341SAndroid Build Coastguard Worker    shufps               m1, m1, q1032
966*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_8bpc).main
967*c0909341SAndroid Build Coastguard Worker
968*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m3, m2
969*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m2
970*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m1, m0
971*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m0
972*c0909341SAndroid Build Coastguard Worker
973*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
974*c0909341SAndroid Build Coastguard Worker    psubsw               m4, m0, m2
975*c0909341SAndroid Build Coastguard Worker    psubsw               m0, m5
976*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m0, m4
977*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m4
978*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m3, m1
979*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m1
980*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m3      ;in1
981*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m3          ;in0
982*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m2, m4      ;in3
983*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m4          ;in2
984*c0909341SAndroid Build Coastguard Worker    jmp                  tx2q
985*c0909341SAndroid Build Coastguard Worker
986*c0909341SAndroid Build Coastguard Worker.pass2:
987*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_8bpc).main
988*c0909341SAndroid Build Coastguard Worker    mova                 m4, m0
989*c0909341SAndroid Build Coastguard Worker    mova                 m5, m1
990*c0909341SAndroid Build Coastguard Worker    mova                 m0, m3
991*c0909341SAndroid Build Coastguard Worker    mova                 m1, m2
992*c0909341SAndroid Build Coastguard Worker    mova                 m2, m5
993*c0909341SAndroid Build Coastguard Worker    mova                 m3, m4
994*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_8bpc).end
995*c0909341SAndroid Build Coastguard Worker
996*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct
997*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst
998*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst
999*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity
1000*c0909341SAndroid Build Coastguard Worker
1001*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1002*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
1003*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [coeffq+16*0]
1004*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [coeffq+16*1]
1005*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [coeffq+16*2]
1006*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3,     [coeffq+16*3]
1007*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m0
1008*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m1
1009*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m2
1010*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m3
1011*c0909341SAndroid Build Coastguard Worker
1012*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
1013*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
1014*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
1015*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
1016*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m4, m1
1017*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m1
1018*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m2
1019*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
1020*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m4      ;in1
1021*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m4          ;in0
1022*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3, m5      ;in2
1023*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m5          ;in3
1024*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1025*c0909341SAndroid Build Coastguard Worker
1026*c0909341SAndroid Build Coastguard Worker.pass2:
1027*c0909341SAndroid Build Coastguard Worker    mova                 m7, [o(pw_1697x8)]
1028*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m7, m0
1029*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m7, m1
1030*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m7, m2
1031*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, m3
1032*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m4
1033*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m5
1034*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m6
1035*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m7
1036*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_8bpc).end
1037*c0909341SAndroid Build Coastguard Worker
1038*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2 ; type1, type2
1039*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 8x8, 8, 16*4
1040*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
1041*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, [coeffq], q0000
1042*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m0
1043*c0909341SAndroid Build Coastguard Worker    mova                 m1, [o(pw_2896x8)]
1044*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
1045*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_16384)]
1046*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd
1047*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
1048*c0909341SAndroid Build Coastguard Worker    psrlw                m2, 3
1049*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
1050*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
1051*c0909341SAndroid Build Coastguard Worker.end:
1052*c0909341SAndroid Build Coastguard Worker    mov                 r3d, 2
1053*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(inv_txfm_add_dct_dct_8x8_8bpc).end3)]
1054*c0909341SAndroid Build Coastguard Worker.loop:
1055*c0909341SAndroid Build Coastguard Worker    WRITE_8X4             0, 0, 0, 0, 1, 2, 3
1056*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
1057*c0909341SAndroid Build Coastguard Worker    dec                 r3d
1058*c0909341SAndroid Build Coastguard Worker    jg .loop
1059*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1060*c0909341SAndroid Build Coastguard Worker.end3:
1061*c0909341SAndroid Build Coastguard Worker    RET
1062*c0909341SAndroid Build Coastguard Worker%endif
1063*c0909341SAndroid Build Coastguard Worker%endmacro
1064*c0909341SAndroid Build Coastguard Worker
1065*c0909341SAndroid Build Coastguard Worker%macro LOAD_8ROWS 2-3 0 ; src, stride, is_rect2
1066*c0909341SAndroid Build Coastguard Worker%if %3
1067*c0909341SAndroid Build Coastguard Worker    mova                 m7, [o(pw_2896x8)]
1068*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m7, [%1+%2*0]
1069*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m7, [%1+%2*1]
1070*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m7, [%1+%2*2]
1071*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m7, [%1+%2*3]
1072*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m7, [%1+%2*4]
1073*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m7, [%1+%2*5]
1074*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m7, [%1+%2*6]
1075*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, [%1+%2*7]
1076*c0909341SAndroid Build Coastguard Worker%else
1077*c0909341SAndroid Build Coastguard Worker    mova                 m0, [%1+%2*0]
1078*c0909341SAndroid Build Coastguard Worker    mova                 m1, [%1+%2*1]
1079*c0909341SAndroid Build Coastguard Worker    mova                 m2, [%1+%2*2]
1080*c0909341SAndroid Build Coastguard Worker    mova                 m3, [%1+%2*3]
1081*c0909341SAndroid Build Coastguard Worker    mova                 m4, [%1+%2*4]
1082*c0909341SAndroid Build Coastguard Worker    mova                 m5, [%1+%2*5]
1083*c0909341SAndroid Build Coastguard Worker    mova                 m6, [%1+%2*6]
1084*c0909341SAndroid Build Coastguard Worker    mova                 m7, [%1+%2*7]
1085*c0909341SAndroid Build Coastguard Worker%endif
1086*c0909341SAndroid Build Coastguard Worker%endmacro
1087*c0909341SAndroid Build Coastguard Worker
1088*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D_ODDHALF 7 ; src[1-4], tmp[1-2], pd_2048
1089*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W         %1, %4, %5, %6, %7,  799, 4017    ;t4a, t7a
1090*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W         %3, %2, %5, %6, %7, 3406, 2276, 1 ;t5a, t6a
1091*c0909341SAndroid Build Coastguard Worker    psubsw               m%2, m%4, m%5                      ;t6a
1092*c0909341SAndroid Build Coastguard Worker    paddsw               m%4, m%5                           ;t7
1093*c0909341SAndroid Build Coastguard Worker    psubsw               m%5, m%1, m%3                      ;t5a
1094*c0909341SAndroid Build Coastguard Worker    paddsw               m%1, m%3                           ;t4
1095*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W         %2, %5, %3, %6, %7, 2896, 2896, 1 ;t5, t6
1096*c0909341SAndroid Build Coastguard Worker%endmacro
1097*c0909341SAndroid Build Coastguard Worker
1098*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct
1099*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst
1100*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst
1101*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity
1102*c0909341SAndroid Build Coastguard Worker
1103*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1104*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 16
1105*c0909341SAndroid Build Coastguard Worker
1106*c0909341SAndroid Build Coastguard Worker.pass1:
1107*c0909341SAndroid Build Coastguard Worker    call .main
1108*c0909341SAndroid Build Coastguard Worker
1109*c0909341SAndroid Build Coastguard Worker.pass1_end:
1110*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_16384)]
1111*c0909341SAndroid Build Coastguard Worker
1112*c0909341SAndroid Build Coastguard Worker.pass1_end1:
1113*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m2, m4, m6
1114*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1115*c0909341SAndroid Build Coastguard Worker
1116*c0909341SAndroid Build Coastguard Worker.pass1_end2:
1117*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m1, m3, m5
1118*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [rsp+gprsize+16*0]
1119*c0909341SAndroid Build Coastguard Worker
1120*c0909341SAndroid Build Coastguard Workercglobal_label .pass1_end3
1121*c0909341SAndroid Build Coastguard Worker    punpcklwd               m6, m1, m5             ;10 50 11 51 12 52 13 53
1122*c0909341SAndroid Build Coastguard Worker    punpckhwd               m1, m5                 ;14 54 15 55 16 56 17 57
1123*c0909341SAndroid Build Coastguard Worker    punpckhwd               m5, m0, m4             ;04 44 05 45 06 46 07 47
1124*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m4                 ;00 40 01 41 02 42 03 43
1125*c0909341SAndroid Build Coastguard Worker    punpckhwd               m4, m3, m7             ;34 74 35 75 36 76 37 77
1126*c0909341SAndroid Build Coastguard Worker    punpcklwd               m3, m7                 ;30 70 31 71 32 72 33 73
1127*c0909341SAndroid Build Coastguard Worker    punpckhwd               m7, m1, m4             ;16 36 56 76 17 37 57 77
1128*c0909341SAndroid Build Coastguard Worker    punpcklwd               m1, m4                 ;14 34 54 74 15 35 55 75
1129*c0909341SAndroid Build Coastguard Worker    punpckhwd               m4, m6, m3             ;12 32 52 72 13 33 53 73
1130*c0909341SAndroid Build Coastguard Worker    punpcklwd               m6, m3                 ;10 30 50 70 11 31 51 71
1131*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*2], m6
1132*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize+16*1]
1133*c0909341SAndroid Build Coastguard Worker    punpckhwd               m3, m2, m6             ;24 64 25 65 26 66 27 67
1134*c0909341SAndroid Build Coastguard Worker    punpcklwd               m2, m6                 ;20 60 21 61 22 62 23 63
1135*c0909341SAndroid Build Coastguard Worker    punpckhwd               m6, m5, m3             ;06 26 46 66 07 27 47 67
1136*c0909341SAndroid Build Coastguard Worker    punpcklwd               m5, m3                 ;04 24 44 64 05 25 45 65
1137*c0909341SAndroid Build Coastguard Worker    punpckhwd               m3, m0, m2             ;02 22 42 62 03 23 43 63
1138*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m2                 ;00 20 40 60 01 21 41 61
1139*c0909341SAndroid Build Coastguard Worker
1140*c0909341SAndroid Build Coastguard Worker    punpckhwd               m2, m6, m7             ;07 17 27 37 47 57 67 77
1141*c0909341SAndroid Build Coastguard Worker    punpcklwd               m6, m7                 ;06 16 26 36 46 56 66 76
1142*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m2
1143*c0909341SAndroid Build Coastguard Worker    punpcklwd               m2, m3, m4             ;02 12 22 32 42 52 62 72
1144*c0909341SAndroid Build Coastguard Worker    punpckhwd               m3, m4                 ;03 13 23 33 43 53 63 73
1145*c0909341SAndroid Build Coastguard Worker    punpcklwd               m4, m5, m1             ;04 14 24 34 44 54 64 74
1146*c0909341SAndroid Build Coastguard Worker    punpckhwd               m5, m1                 ;05 15 25 35 45 55 65 75
1147*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*2]
1148*c0909341SAndroid Build Coastguard Worker    punpckhwd               m1, m0, m7             ;01 11 21 31 41 51 61 71
1149*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m7                 ;00 10 20 30 40 50 60 70
1150*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
1151*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
1152*c0909341SAndroid Build Coastguard Worker
1153*c0909341SAndroid Build Coastguard Worker.pass2:
1154*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x8_internal_8bpc).end4)]
1155*c0909341SAndroid Build Coastguard Worker
1156*c0909341SAndroid Build Coastguard Worker.pass2_main:
1157*c0909341SAndroid Build Coastguard Worker    call .main
1158*c0909341SAndroid Build Coastguard Worker
1159*c0909341SAndroid Build Coastguard Worker.end:
1160*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2048)]
1161*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m2, m4, m6
1162*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1163*c0909341SAndroid Build Coastguard Worker
1164*c0909341SAndroid Build Coastguard Worker.end2:
1165*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m1, m3, m5
1166*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [rsp+gprsize+16*0]
1167*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*2], m5
1168*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
1169*c0909341SAndroid Build Coastguard Worker
1170*c0909341SAndroid Build Coastguard Worker.end3:
1171*c0909341SAndroid Build Coastguard Worker    WRITE_8X4                0, 1, 2, 3, 5, 6, 7
1172*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
1173*c0909341SAndroid Build Coastguard Worker    WRITE_8X4                4, [rsp+gprsize+16*2], [rsp+gprsize+16*1], [rsp+gprsize+16*0], 5, 6, 7
1174*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
1175*c0909341SAndroid Build Coastguard Worker
1176*c0909341SAndroid Build Coastguard Worker.end4:
1177*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
1178*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7
1179*c0909341SAndroid Build Coastguard Worker    ret
1180*c0909341SAndroid Build Coastguard Worker
1181*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1182*c0909341SAndroid Build Coastguard Workercglobal_label .main
1183*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m7
1184*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m3
1185*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m1
1186*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
1187*c0909341SAndroid Build Coastguard Worker    IDCT4_1D                 0, 2, 4, 6, 1, 3, 7
1188*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*2]
1189*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m2
1190*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*1]
1191*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m4
1192*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*0]
1193*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m6
1194*c0909341SAndroid Build Coastguard Worker    IDCT8_1D_ODDHALF         3, 2, 5, 4, 1, 6, 7
1195*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*0]
1196*c0909341SAndroid Build Coastguard Worker    psubsw                  m7, m0, m4                    ;out7
1197*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m4                        ;out0
1198*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m7
1199*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*2]
1200*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m6, m3                    ;out4
1201*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m6                        ;out3
1202*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*1]
1203*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m1, m5                    ;out6
1204*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m5                        ;out1
1205*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m7, m2                    ;out5
1206*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m7                        ;out2
1207*c0909341SAndroid Build Coastguard Worker    ret
1208*c0909341SAndroid Build Coastguard Worker
1209*c0909341SAndroid Build Coastguard Worker
1210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct
1211*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst
1212*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst
1213*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity
1214*c0909341SAndroid Build Coastguard Worker
1215*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1216*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 16
1217*c0909341SAndroid Build Coastguard Worker
1218*c0909341SAndroid Build Coastguard Worker.pass1:
1219*c0909341SAndroid Build Coastguard Worker    call .main
1220*c0909341SAndroid Build Coastguard Worker    call .main_pass1_end
1221*c0909341SAndroid Build Coastguard Worker
1222*c0909341SAndroid Build Coastguard Worker.pass1_end:
1223*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_16384)]
1224*c0909341SAndroid Build Coastguard Worker
1225*c0909341SAndroid Build Coastguard Worker.pass1_end1:
1226*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m2, m4, m6
1227*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1228*c0909341SAndroid Build Coastguard Worker    pxor                    m6, m6
1229*c0909341SAndroid Build Coastguard Worker    psubw                   m6, m7
1230*c0909341SAndroid Build Coastguard Worker    mova                    m7, m6
1231*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_8bpc).pass1_end2
1232*c0909341SAndroid Build Coastguard Worker
1233*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1234*c0909341SAndroid Build Coastguard Worker.pass2:
1235*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x8_internal_8bpc).end4)]
1236*c0909341SAndroid Build Coastguard Worker
1237*c0909341SAndroid Build Coastguard Worker.pass2_main:
1238*c0909341SAndroid Build Coastguard Worker    call .main
1239*c0909341SAndroid Build Coastguard Worker    call .main_pass2_end
1240*c0909341SAndroid Build Coastguard Worker
1241*c0909341SAndroid Build Coastguard Worker.end:
1242*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2048)]
1243*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m2, m4, m6
1244*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1245*c0909341SAndroid Build Coastguard Worker    pxor                    m6, m6
1246*c0909341SAndroid Build Coastguard Worker    psubw                   m6, m7
1247*c0909341SAndroid Build Coastguard Worker    mova                    m7, m6
1248*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_8bpc).end2
1249*c0909341SAndroid Build Coastguard Worker
1250*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1251*c0909341SAndroid Build Coastguard Workercglobal_label .main
1252*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m7
1253*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m3
1254*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m4
1255*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
1256*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 2, 3, 4, 7, 1931, 3612    ;t3a, t2a
1257*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            1, 6, 3, 4, 7, 3920, 1189    ;t7a, t6a
1258*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2, m6                    ;t2
1259*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m6                        ;t6
1260*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m5, m1                    ;t3
1261*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1                        ;t7
1262*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 2, 1, 6, 7, 3784, 1567    ;t6a, t7a
1263*c0909341SAndroid Build Coastguard Worker
1264*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*2]
1265*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m5
1266*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*1]
1267*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m2
1268*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*0]
1269*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m3
1270*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 0, 2, 3, 7,  401, 4076    ;t1a, t0a
1271*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            1, 6, 2, 3, 7, 3166, 2598    ;t5a, t4a
1272*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m6                    ;t4
1273*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m6                        ;t0
1274*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m5, m1                    ;t1
1275*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1                        ;t5
1276*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 5, 1, 6, 7, 1567, 3784    ;t5a, t4a
1277*c0909341SAndroid Build Coastguard Worker
1278*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*0]
1279*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3, m4                    ;-out7
1280*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m4                        ;t3
1281*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m1
1282*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m7                    ;t2
1283*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m7                        ;out0
1284*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*2]
1285*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*1]
1286*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m5, m6                    ;-out1
1287*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m6                        ;t6
1288*c0909341SAndroid Build Coastguard Worker    paddsw                  m6, m2, m7                    ;out6
1289*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m7                        ;t7
1290*c0909341SAndroid Build Coastguard Worker    ret
1291*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1292*c0909341SAndroid Build Coastguard Worker.main_pass1_end:
1293*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m1
1294*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m6
1295*c0909341SAndroid Build Coastguard Worker    punpckhwd               m1, m4, m3
1296*c0909341SAndroid Build Coastguard Worker    punpcklwd               m4, m3
1297*c0909341SAndroid Build Coastguard Worker    punpckhwd               m7, m5, m2
1298*c0909341SAndroid Build Coastguard Worker    punpcklwd               m5, m2
1299*c0909341SAndroid Build Coastguard Worker    mova                    m2, [o(pw_2896_2896)]
1300*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pd_2048)]
1301*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m3, m2, m7
1302*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m2, m5
1303*c0909341SAndroid Build Coastguard Worker    paddd                   m3, m6
1304*c0909341SAndroid Build Coastguard Worker    paddd                   m2, m6
1305*c0909341SAndroid Build Coastguard Worker    psrad                   m3, 12
1306*c0909341SAndroid Build Coastguard Worker    psrad                   m2, 12
1307*c0909341SAndroid Build Coastguard Worker    packssdw                m2, m3                        ;out2
1308*c0909341SAndroid Build Coastguard Worker    mova                    m3, [o(pw_2896_m2896)]
1309*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m7, m3
1310*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m5, m3
1311*c0909341SAndroid Build Coastguard Worker    paddd                   m7, m6
1312*c0909341SAndroid Build Coastguard Worker    paddd                   m5, m6
1313*c0909341SAndroid Build Coastguard Worker    psrad                   m7, 12
1314*c0909341SAndroid Build Coastguard Worker    psrad                   m5, 12
1315*c0909341SAndroid Build Coastguard Worker    packssdw                m5, m7                        ;-out5
1316*c0909341SAndroid Build Coastguard Worker    mova                    m3, [o(pw_2896_2896)]
1317*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m7, m3, m1
1318*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m3, m4
1319*c0909341SAndroid Build Coastguard Worker    paddd                   m7, m6
1320*c0909341SAndroid Build Coastguard Worker    paddd                   m3, m6
1321*c0909341SAndroid Build Coastguard Worker    psrad                   m7, 12
1322*c0909341SAndroid Build Coastguard Worker    psrad                   m3, 12
1323*c0909341SAndroid Build Coastguard Worker    packssdw                m3, m7                        ;-out3
1324*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2896_m2896)]
1325*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m1, m7
1326*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m4, m7
1327*c0909341SAndroid Build Coastguard Worker    paddd                   m1, m6
1328*c0909341SAndroid Build Coastguard Worker    paddd                   m4, m6
1329*c0909341SAndroid Build Coastguard Worker    psrad                   m1, 12
1330*c0909341SAndroid Build Coastguard Worker    psrad                   m4, 12
1331*c0909341SAndroid Build Coastguard Worker    packssdw                m4, m1                        ;-out5
1332*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*1]
1333*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*2]
1334*c0909341SAndroid Build Coastguard Worker    ret
1335*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1336*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end
1337*c0909341SAndroid Build Coastguard Worker    paddsw                  m7, m4, m3                    ;t2 + t3
1338*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m3                        ;t2 - t3
1339*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m5, m2                    ;t6 + t7
1340*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2                        ;t6 - t7
1341*c0909341SAndroid Build Coastguard Worker    mova                    m2, [o(pw_2896x8)]
1342*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m2                        ;out4
1343*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m5, m2                        ;-out5
1344*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, m2                        ;-out3
1345*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m3                        ;out2
1346*c0909341SAndroid Build Coastguard Worker    mova                    m3, m7
1347*c0909341SAndroid Build Coastguard Worker    ret
1348*c0909341SAndroid Build Coastguard Worker
1349*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct
1350*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst
1351*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst
1352*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity
1353*c0909341SAndroid Build Coastguard Worker
1354*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1355*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 16
1356*c0909341SAndroid Build Coastguard Worker
1357*c0909341SAndroid Build Coastguard Worker.pass1:
1358*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main
1359*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main_pass1_end
1360*c0909341SAndroid Build Coastguard Worker
1361*c0909341SAndroid Build Coastguard Worker.pass1_end:
1362*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_m16384)]
1363*c0909341SAndroid Build Coastguard Worker
1364*c0909341SAndroid Build Coastguard Worker.pass1_end1:
1365*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7
1366*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m1
1367*c0909341SAndroid Build Coastguard Worker    mova                    m1, m6
1368*c0909341SAndroid Build Coastguard Worker    mova                    m6, m2
1369*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m5, m7
1370*c0909341SAndroid Build Coastguard Worker    mova                    m5, m6
1371*c0909341SAndroid Build Coastguard Worker    mova                    m6, m4
1372*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m3, m7
1373*c0909341SAndroid Build Coastguard Worker    mova                    m3, m6
1374*c0909341SAndroid Build Coastguard Worker    mova                    m6, m0
1375*c0909341SAndroid Build Coastguard Worker    mova                    m0, m7
1376*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
1377*c0909341SAndroid Build Coastguard Worker    psubw                   m7, m0
1378*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [rsp+gprsize+16*0]
1379*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m1, m3, m5
1380*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, m6
1381*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_8bpc).pass1_end3
1382*c0909341SAndroid Build Coastguard Worker
1383*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1384*c0909341SAndroid Build Coastguard Worker.pass2:
1385*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x8_internal_8bpc).end4)]
1386*c0909341SAndroid Build Coastguard Worker
1387*c0909341SAndroid Build Coastguard Worker.pass2_main:
1388*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main
1389*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main_pass2_end
1390*c0909341SAndroid Build Coastguard Worker
1391*c0909341SAndroid Build Coastguard Worker.end:
1392*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2048)]
1393*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m2, m4, m6
1394*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*2], m2
1395*c0909341SAndroid Build Coastguard Worker    mova                    m2, m0
1396*c0909341SAndroid Build Coastguard Worker    pxor                    m0, m0
1397*c0909341SAndroid Build Coastguard Worker    psubw                   m0, m7
1398*c0909341SAndroid Build Coastguard Worker    mova                    m7, m2
1399*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m0
1400*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m5, m0
1401*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m1
1402*c0909341SAndroid Build Coastguard Worker    mova                    m5, m4
1403*c0909341SAndroid Build Coastguard Worker    mova                    m1, m6
1404*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m3, m0
1405*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [rsp+gprsize+16*0]
1406*c0909341SAndroid Build Coastguard Worker    mova                    m3, m5
1407*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
1408*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_8bpc).end3
1409*c0909341SAndroid Build Coastguard Worker
1410*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct
1411*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst
1412*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst
1413*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity
1414*c0909341SAndroid Build Coastguard Worker
1415*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1416*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 16
1417*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1418*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end3
1419*c0909341SAndroid Build Coastguard Worker
1420*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1421*c0909341SAndroid Build Coastguard Worker.pass2:
1422*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x8_internal_8bpc).end4)]
1423*c0909341SAndroid Build Coastguard Worker
1424*c0909341SAndroid Build Coastguard Worker.end:
1425*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [o(pw_4096)]
1426*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
1427*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_4096)]
1428*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
1429*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*2], m5
1430*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
1431*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_8bpc).end3
1432*c0909341SAndroid Build Coastguard Worker
1433*c0909341SAndroid Build Coastguard Worker
1434*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X16_FN 2 ; type1, type2
1435*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 4x16, 8
1436*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
1437*c0909341SAndroid Build Coastguard Worker    pshuflw               m0, [coeffq], q0000
1438*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m0
1439*c0909341SAndroid Build Coastguard Worker    mova                  m1, [o(pw_2896x8)]
1440*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, m1
1441*c0909341SAndroid Build Coastguard Worker    mov             [coeffq], eobd
1442*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, [o(pw_16384)]
1443*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, m1
1444*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, [o(pw_2048)]
1445*c0909341SAndroid Build Coastguard Worker.end:
1446*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             0, 0, 1, 2, 3, 0, 1, 2, 3
1447*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1448*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             0, 0, 1, 2, 3, 0, 1, 2, 3
1449*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1450*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             0, 0, 1, 2, 3, 0, 1, 2, 3
1451*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1452*c0909341SAndroid Build Coastguard Worker    WRITE_4X4             0, 0, 1, 2, 3, 0, 1, 2, 3
1453*c0909341SAndroid Build Coastguard Worker    RET
1454*c0909341SAndroid Build Coastguard Worker%endif
1455*c0909341SAndroid Build Coastguard Worker%endmacro
1456*c0909341SAndroid Build Coastguard Worker
1457*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct
1458*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst
1459*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst
1460*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity
1461*c0909341SAndroid Build Coastguard Worker
1462*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1463*c0909341SAndroid Build Coastguard Worker    lea                  r3, [o(m(idct_4x8_internal_8bpc).pass1)]
1464*c0909341SAndroid Build Coastguard Worker
1465*c0909341SAndroid Build Coastguard Worker.pass1:
1466*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*1]
1467*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*3]
1468*c0909341SAndroid Build Coastguard Worker    mova                 m2, [coeffq+16*5]
1469*c0909341SAndroid Build Coastguard Worker    mova                 m3, [coeffq+16*7]
1470*c0909341SAndroid Build Coastguard Worker    push               tx2q
1471*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(idct_4x16_internal_8bpc).pass1_2)]
1472*c0909341SAndroid Build Coastguard Worker    jmp                  r3
1473*c0909341SAndroid Build Coastguard Worker
1474*c0909341SAndroid Build Coastguard Worker.pass1_2:
1475*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*1], m0
1476*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*3], m1
1477*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*5], m2
1478*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+16*7], m3
1479*c0909341SAndroid Build Coastguard Worker    mova                 m0, [coeffq+16*0]
1480*c0909341SAndroid Build Coastguard Worker    mova                 m1, [coeffq+16*2]
1481*c0909341SAndroid Build Coastguard Worker    mova                 m2, [coeffq+16*4]
1482*c0909341SAndroid Build Coastguard Worker    mova                 m3, [coeffq+16*6]
1483*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(idct_4x16_internal_8bpc).pass1_end)]
1484*c0909341SAndroid Build Coastguard Worker    jmp                  r3
1485*c0909341SAndroid Build Coastguard Worker
1486*c0909341SAndroid Build Coastguard Worker.pass1_end:
1487*c0909341SAndroid Build Coastguard Worker    pop                tx2q
1488*c0909341SAndroid Build Coastguard Worker
1489*c0909341SAndroid Build Coastguard Worker    mova                 m4, [coeffq+16*1]
1490*c0909341SAndroid Build Coastguard Worker    mova                 m5, [coeffq+16*3]
1491*c0909341SAndroid Build Coastguard Worker    mova                 m6, [coeffq+16*5]
1492*c0909341SAndroid Build Coastguard Worker    mova                 m7, [o(pw_16384)]
1493*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
1494*c0909341SAndroid Build Coastguard Worker
1495*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, [coeffq+16*7]
1496*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m7
1497*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1498*c0909341SAndroid Build Coastguard Worker
1499*c0909341SAndroid Build Coastguard Worker.pass2:
1500*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_8bpc).main
1501*c0909341SAndroid Build Coastguard Worker
1502*c0909341SAndroid Build Coastguard Worker.end:
1503*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_2048)]
1504*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
1505*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m7, [coeffq+16*7]
1506*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m4
1507*c0909341SAndroid Build Coastguard Worker
1508*c0909341SAndroid Build Coastguard Worker.end1:
1509*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m5
1510*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m6
1511*c0909341SAndroid Build Coastguard Worker    mov                   r3, coeffq
1512*c0909341SAndroid Build Coastguard Worker    WRITE_4X8              0, 1, 3, 2
1513*c0909341SAndroid Build Coastguard Worker
1514*c0909341SAndroid Build Coastguard Worker    mova                  m0, [r3+16*4]
1515*c0909341SAndroid Build Coastguard Worker    mova                  m1, [r3+16*5]
1516*c0909341SAndroid Build Coastguard Worker    mova                  m2, [r3+16*6]
1517*c0909341SAndroid Build Coastguard Worker    mova                  m3, m7
1518*c0909341SAndroid Build Coastguard Worker    lea                 dstq, [dstq+strideq*4]
1519*c0909341SAndroid Build Coastguard Worker    WRITE_4X8              0, 1, 3, 2
1520*c0909341SAndroid Build Coastguard Worker
1521*c0909341SAndroid Build Coastguard Worker.end2:
1522*c0909341SAndroid Build Coastguard Worker    pxor                  m7, m7
1523*c0909341SAndroid Build Coastguard Worker    REPX     {mova [r3+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7
1524*c0909341SAndroid Build Coastguard Worker    ret
1525*c0909341SAndroid Build Coastguard Worker
1526*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct
1527*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst
1528*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst
1529*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity
1530*c0909341SAndroid Build Coastguard Worker
1531*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1532*c0909341SAndroid Build Coastguard Worker    lea                   r3, [o(m(iadst_4x8_internal_8bpc).pass1)]
1533*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_4x16_internal_8bpc).pass1
1534*c0909341SAndroid Build Coastguard Worker
1535*c0909341SAndroid Build Coastguard Worker.pass2:
1536*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main
1537*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main_pass2_end
1538*c0909341SAndroid Build Coastguard Worker
1539*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m6, m5, m4                ;low: -out5  high: -out7
1540*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m4, m5                    ;low:  out8  high:  out10
1541*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m5, m7, m2                ;low:  out4  high:  out6
1542*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m2, m7                    ;low: -out9  high: -out11
1543*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m2
1544*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m6
1545*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*6]
1546*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*7]
1547*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m1, m6, m0                ;low: -out13 high: -out15
1548*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m0, m6                    ;low:  out0  high:  out2
1549*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m6, m3, m2                ;low:  out12 high:  out14
1550*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m3                    ;low: -out1  high: -out3
1551*c0909341SAndroid Build Coastguard Worker
1552*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_2048)]
1553*c0909341SAndroid Build Coastguard Worker
1554*c0909341SAndroid Build Coastguard Worker.end1:
1555*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m7}, m0, m5, m4, m6
1556*c0909341SAndroid Build Coastguard Worker    pxor                  m3, m3
1557*c0909341SAndroid Build Coastguard Worker    psubw                 m3, m7
1558*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*4]
1559*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m3}, m2, m7, m1
1560*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m3, [coeffq+16*5]
1561*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m5
1562*c0909341SAndroid Build Coastguard Worker
1563*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m5, m4, m7                ;low:  out10 high:  out11
1564*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m7                    ;low:  out8  high:  out9
1565*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m7, m6, m1                ;low:  out14 high:  out15
1566*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m6, m1                    ;low:  out12 high:  out13
1567*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m1, m0, m2                ;low:  out2  high:  out3
1568*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m0, m2                    ;low:  out0  high:  out1
1569*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m4
1570*c0909341SAndroid Build Coastguard Worker    mova                  m4, [coeffq+16*7]
1571*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m4, m3                ;low:  out4  high:  out5
1572*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m4, m3                    ;low:  out6  high:  out7
1573*c0909341SAndroid Build Coastguard Worker    mova                  m3, m4
1574*c0909341SAndroid Build Coastguard Worker
1575*c0909341SAndroid Build Coastguard Worker.end2:
1576*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m5
1577*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m6
1578*c0909341SAndroid Build Coastguard Worker    mov                   r3, coeffq
1579*c0909341SAndroid Build Coastguard Worker    WRITE_4X8              0, 1, 2, 3
1580*c0909341SAndroid Build Coastguard Worker
1581*c0909341SAndroid Build Coastguard Worker    mova                  m0, [r3+16*4]
1582*c0909341SAndroid Build Coastguard Worker    mova                  m1, [r3+16*5]
1583*c0909341SAndroid Build Coastguard Worker    mova                  m2, [r3+16*6]
1584*c0909341SAndroid Build Coastguard Worker    mova                  m3, m7
1585*c0909341SAndroid Build Coastguard Worker    lea                 dstq, [dstq+strideq*4]
1586*c0909341SAndroid Build Coastguard Worker    WRITE_4X8              0, 1, 2, 3
1587*c0909341SAndroid Build Coastguard Worker
1588*c0909341SAndroid Build Coastguard Worker.end3:
1589*c0909341SAndroid Build Coastguard Worker    pxor                  m7, m7
1590*c0909341SAndroid Build Coastguard Worker    REPX     {mova [r3+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7
1591*c0909341SAndroid Build Coastguard Worker    ret
1592*c0909341SAndroid Build Coastguard Worker
1593*c0909341SAndroid Build Coastguard Worker
1594*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct
1595*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst
1596*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst
1597*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity
1598*c0909341SAndroid Build Coastguard Worker
1599*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1600*c0909341SAndroid Build Coastguard Worker    lea                   r3, [o(m(iflipadst_4x8_internal_8bpc).pass1)]
1601*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_4x16_internal_8bpc).pass1
1602*c0909341SAndroid Build Coastguard Worker
1603*c0909341SAndroid Build Coastguard Worker.pass2:
1604*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main
1605*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main_pass2_end
1606*c0909341SAndroid Build Coastguard Worker
1607*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m6, m5, m4                ;low:  out5  high:  out7
1608*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m5                    ;low: -out8  high: -out10
1609*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m5, m7, m2                ;low: -out4  high: -out6
1610*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m7                    ;low:  out9  high:  out11
1611*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m2
1612*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m6
1613*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*6]
1614*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*7]
1615*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m1, m6, m0                ;low:  out13 high:  out15
1616*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m0, m6                    ;low: -out0  high: -out2
1617*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m6, m3, m2                ;low: -out12 high: -out14
1618*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m2, m3                    ;low:  out1  high:  out3
1619*c0909341SAndroid Build Coastguard Worker
1620*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_m2048)]
1621*c0909341SAndroid Build Coastguard Worker    jmp   m(iadst_4x16_internal_8bpc).end1
1622*c0909341SAndroid Build Coastguard Worker
1623*c0909341SAndroid Build Coastguard Worker
1624*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct
1625*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst
1626*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst
1627*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity
1628*c0909341SAndroid Build Coastguard Worker
1629*c0909341SAndroid Build Coastguard Worker%macro IDTX16 3-4 ; src/dst, tmp, pw_1697x16, [pw_16394]
1630*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%2, m%3, m%1
1631*c0909341SAndroid Build Coastguard Worker%if %0 == 4 ; if downshifting by 1
1632*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%2, m%4
1633*c0909341SAndroid Build Coastguard Worker%else
1634*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%1
1635*c0909341SAndroid Build Coastguard Worker%endif
1636*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%2
1637*c0909341SAndroid Build Coastguard Worker%endmacro
1638*c0909341SAndroid Build Coastguard Worker
1639*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1640*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*1]
1641*c0909341SAndroid Build Coastguard Worker    mova                  m6, [o(pw_1697x8)]
1642*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*3]
1643*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*5]
1644*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*7]
1645*c0909341SAndroid Build Coastguard Worker    pcmpeqw               m7, m7
1646*c0909341SAndroid Build Coastguard Worker    mov                   r3, tx2q
1647*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(.pass1_2)]
1648*c0909341SAndroid Build Coastguard Worker.pass1:
1649*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m4, m6, m0
1650*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m6, m1
1651*c0909341SAndroid Build Coastguard Worker    pavgw                 m4, m0
1652*c0909341SAndroid Build Coastguard Worker    pcmpeqw               m0, m7
1653*c0909341SAndroid Build Coastguard Worker    pavgw                 m5, m1
1654*c0909341SAndroid Build Coastguard Worker    pcmpeqw               m1, m7
1655*c0909341SAndroid Build Coastguard Worker    pandn                 m0, m4
1656*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m4, m6, m2
1657*c0909341SAndroid Build Coastguard Worker    pandn                 m1, m5
1658*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m6, m3
1659*c0909341SAndroid Build Coastguard Worker    pavgw                 m4, m2
1660*c0909341SAndroid Build Coastguard Worker    pcmpeqw               m2, m7
1661*c0909341SAndroid Build Coastguard Worker    pavgw                 m5, m3
1662*c0909341SAndroid Build Coastguard Worker    pcmpeqw               m3, m7
1663*c0909341SAndroid Build Coastguard Worker    pandn                 m2, m4
1664*c0909341SAndroid Build Coastguard Worker    pandn                 m3, m5
1665*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).pass1_end
1666*c0909341SAndroid Build Coastguard Worker.pass1_2:
1667*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*1], m0
1668*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*3], m1
1669*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m2
1670*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m3
1671*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*0]
1672*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*2]
1673*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*4]
1674*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*6]
1675*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(.pass1_end)]
1676*c0909341SAndroid Build Coastguard Worker    jmp .pass1
1677*c0909341SAndroid Build Coastguard Worker.pass1_end:
1678*c0909341SAndroid Build Coastguard Worker    mova                  m4, [coeffq+16*1]
1679*c0909341SAndroid Build Coastguard Worker    mova                  m5, [coeffq+16*3]
1680*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*5]
1681*c0909341SAndroid Build Coastguard Worker    jmp                   r3
1682*c0909341SAndroid Build Coastguard Worker.pass2:
1683*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_1697x16)]
1684*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m6
1685*c0909341SAndroid Build Coastguard Worker    REPX    {IDTX16 x, 6, 7}, 0, 1, 2, 3, 4, 5
1686*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*7]
1687*c0909341SAndroid Build Coastguard Worker    IDTX16                 6, 7, 7
1688*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m6
1689*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*6]
1690*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m7, m6, [o(pw_1697x16)]
1691*c0909341SAndroid Build Coastguard Worker    paddsw                m6, m6
1692*c0909341SAndroid Build Coastguard Worker    paddsw                m6, m7
1693*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_2048)]
1694*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
1695*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m7, [coeffq+16*7]
1696*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m4
1697*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x16_internal_8bpc).end2
1698*c0909341SAndroid Build Coastguard Worker
1699*c0909341SAndroid Build Coastguard Worker
1700*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X4_FN 2 ; type1, type2
1701*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 16x4, 8
1702*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
1703*c0909341SAndroid Build Coastguard Worker    movd                 m1, [o(pw_2896x8)]
1704*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, [coeffq]
1705*c0909341SAndroid Build Coastguard Worker    movd                 m2, [o(pw_16384)]
1706*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd
1707*c0909341SAndroid Build Coastguard Worker    mov                 r2d, 2
1708*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(inv_txfm_add_dct_dct_16x4_8bpc).end)]
1709*c0909341SAndroid Build Coastguard Worker.dconly:
1710*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
1711*c0909341SAndroid Build Coastguard Worker    movd                 m2, [o(pw_2048)]              ;intentionally rip-relative
1712*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
1713*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
1714*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, m0, q0000
1715*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m0
1716*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
1717*c0909341SAndroid Build Coastguard Worker.dconly_loop:
1718*c0909341SAndroid Build Coastguard Worker    mova                 m1, [dstq]
1719*c0909341SAndroid Build Coastguard Worker    mova                 m3, [dstq+strideq]
1720*c0909341SAndroid Build Coastguard Worker    punpckhbw            m2, m1, m5
1721*c0909341SAndroid Build Coastguard Worker    punpcklbw            m1, m5
1722*c0909341SAndroid Build Coastguard Worker    punpckhbw            m4, m3, m5
1723*c0909341SAndroid Build Coastguard Worker    punpcklbw            m3, m5
1724*c0909341SAndroid Build Coastguard Worker    paddw                m2, m0
1725*c0909341SAndroid Build Coastguard Worker    paddw                m1, m0
1726*c0909341SAndroid Build Coastguard Worker    paddw                m4, m0
1727*c0909341SAndroid Build Coastguard Worker    paddw                m3, m0
1728*c0909341SAndroid Build Coastguard Worker    packuswb             m1, m2
1729*c0909341SAndroid Build Coastguard Worker    packuswb             m3, m4
1730*c0909341SAndroid Build Coastguard Worker    mova             [dstq], m1
1731*c0909341SAndroid Build Coastguard Worker    mova     [dstq+strideq], m3
1732*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
1733*c0909341SAndroid Build Coastguard Worker    dec                 r2d
1734*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
1735*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1736*c0909341SAndroid Build Coastguard Worker.end:
1737*c0909341SAndroid Build Coastguard Worker    RET
1738*c0909341SAndroid Build Coastguard Worker%endif
1739*c0909341SAndroid Build Coastguard Worker%endmacro
1740*c0909341SAndroid Build Coastguard Worker
1741*c0909341SAndroid Build Coastguard Worker%macro LOAD_7ROWS 2 ;src, stride
1742*c0909341SAndroid Build Coastguard Worker    mova                 m0, [%1+%2*0]
1743*c0909341SAndroid Build Coastguard Worker    mova                 m1, [%1+%2*1]
1744*c0909341SAndroid Build Coastguard Worker    mova                 m2, [%1+%2*2]
1745*c0909341SAndroid Build Coastguard Worker    mova                 m3, [%1+%2*3]
1746*c0909341SAndroid Build Coastguard Worker    mova                 m4, [%1+%2*4]
1747*c0909341SAndroid Build Coastguard Worker    mova                 m5, [%1+%2*5]
1748*c0909341SAndroid Build Coastguard Worker    mova                 m6, [%1+%2*6]
1749*c0909341SAndroid Build Coastguard Worker%endmacro
1750*c0909341SAndroid Build Coastguard Worker
1751*c0909341SAndroid Build Coastguard Worker%macro SAVE_7ROWS 2 ;src, stride
1752*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*0], m0
1753*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*1], m1
1754*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*2], m2
1755*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*3], m3
1756*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*4], m4
1757*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*5], m5
1758*c0909341SAndroid Build Coastguard Worker    mova          [%1+%2*6], m6
1759*c0909341SAndroid Build Coastguard Worker%endmacro
1760*c0909341SAndroid Build Coastguard Worker
1761*c0909341SAndroid Build Coastguard Worker%macro IDCT16_1D_PACKED_ODDHALF 7  ;src[1-4], tmp[1-3]
1762*c0909341SAndroid Build Coastguard Worker    punpckhwd            m%5, m%4, m%1                ;packed in13 in3
1763*c0909341SAndroid Build Coastguard Worker    punpcklwd            m%1, m%4                     ;packed in1  in15
1764*c0909341SAndroid Build Coastguard Worker    punpcklwd            m%4, m%3, m%2                ;packed in9  in7
1765*c0909341SAndroid Build Coastguard Worker    punpckhwd            m%2, m%3                     ;packed in5  in11
1766*c0909341SAndroid Build Coastguard Worker    mova                 m%7, [o(pd_2048)]
1767*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %1, %6, %7,  401, 4076, 1    ;low: t8a   high: t15a
1768*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %4, %6, %7, 3166, 2598, 1    ;low: t9a   high: t14a
1769*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %2, %6, %7, 1931, 3612, 1    ;low: t10a  high: t13a
1770*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %5, %6, %7, 3920, 1189, 1    ;low: t11a  high: t12a
1771*c0909341SAndroid Build Coastguard Worker    psubsw               m%6, m%1, m%4                 ;low: t9    high: t14
1772*c0909341SAndroid Build Coastguard Worker    paddsw               m%1, m%4                      ;low: t8    high: t15
1773*c0909341SAndroid Build Coastguard Worker    psubsw               m%4, m%5, m%2                 ;low: t10   high: t13
1774*c0909341SAndroid Build Coastguard Worker    paddsw               m%5, m%2                      ;low: t11   high: t12
1775*c0909341SAndroid Build Coastguard Worker    mova                 m%2, [o(deint_shuf2)]
1776*c0909341SAndroid Build Coastguard Worker    pshufb               m%6, m%2
1777*c0909341SAndroid Build Coastguard Worker    pshufb               m%4, m%2
1778*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %6, %3, %7, 1567, 3784, 1    ;low: t9a   high: t14a
1779*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %4, %3, %7, m3784, 1567, 1   ;low: t10a  high: t13a
1780*c0909341SAndroid Build Coastguard Worker    psubsw               m%3, m%1, m%5                 ;low: t11a  high: t12a
1781*c0909341SAndroid Build Coastguard Worker    paddsw               m%1, m%5                      ;low: t8a   high: t15a
1782*c0909341SAndroid Build Coastguard Worker    psubsw               m%5, m%6, m%4                 ;low: t10   high: t13
1783*c0909341SAndroid Build Coastguard Worker    paddsw               m%6, m%4                      ;low: t9    high: t14
1784*c0909341SAndroid Build Coastguard Worker    pshufb               m%3, m%2
1785*c0909341SAndroid Build Coastguard Worker    pshufb               m%5, m%2
1786*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %3, %2, %7, 2896, 2896, 4    ;t12,  t11
1787*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK        %5, %4, %7, 2896, 2896, 4    ;t13a, t10a
1788*c0909341SAndroid Build Coastguard Worker    packssdw             m%2, m%4                      ;low: t11   high: t10a
1789*c0909341SAndroid Build Coastguard Worker    packssdw             m%3, m%5                      ;low: t12   high: t13a
1790*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m%4, m%1, m%6                 ;low: t15a  high: t14
1791*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m%1, m%6                      ;low: t8a   high: t9
1792*c0909341SAndroid Build Coastguard Worker%endmacro
1793*c0909341SAndroid Build Coastguard Worker
1794*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct
1795*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst
1796*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst
1797*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity
1798*c0909341SAndroid Build Coastguard Worker
1799*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1800*c0909341SAndroid Build Coastguard Worker    LOAD_7ROWS        coeffq, 16
1801*c0909341SAndroid Build Coastguard Worker    call .main
1802*c0909341SAndroid Build Coastguard Worker
1803*c0909341SAndroid Build Coastguard Worker.pass1_end:
1804*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m0, m2                 ;packed out1,  out5
1805*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m2                     ;packed out0,  out4
1806*c0909341SAndroid Build Coastguard Worker    punpcklwd             m2, m1, m3                 ;packed out3,  out7
1807*c0909341SAndroid Build Coastguard Worker    punpckhwd             m1, m3                     ;packed out2,  out6
1808*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m7
1809*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*7]
1810*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m4, m6                 ;packed out9,  out13
1811*c0909341SAndroid Build Coastguard Worker    punpcklwd             m4, m6                     ;packed out8,  out12
1812*c0909341SAndroid Build Coastguard Worker    punpcklwd             m6, m5, m7                 ;packed out11, out15
1813*c0909341SAndroid Build Coastguard Worker    punpckhwd             m5, m7                     ;packed out10, out14
1814*c0909341SAndroid Build Coastguard Worker
1815*c0909341SAndroid Build Coastguard Worker.pass1_end2:
1816*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_16384)]
1817*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
1818*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m7, [coeffq+16*6]
1819*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m7
1820*c0909341SAndroid Build Coastguard Worker
1821*c0909341SAndroid Build Coastguard Worker.pass1_end3:
1822*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m3, m6                 ;packed 9, 11, 13, 15 high
1823*c0909341SAndroid Build Coastguard Worker    punpcklwd             m3, m6                     ;packed 9, 10, 13, 15 low
1824*c0909341SAndroid Build Coastguard Worker    punpckhwd             m6, m4, m5                 ;packed 8, 10, 12, 14 high
1825*c0909341SAndroid Build Coastguard Worker    punpcklwd             m4, m5                     ;packed 8, 10, 12, 14 low
1826*c0909341SAndroid Build Coastguard Worker    punpckhwd             m5, m4, m3                 ;8, 9, 10, 11, 12, 13, 14, 15(1)
1827*c0909341SAndroid Build Coastguard Worker    punpcklwd             m4, m3                     ;8, 9, 10, 11, 12, 13, 14, 15(0)
1828*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m6, m7                 ;8, 9, 10, 11, 12, 13, 14, 15(3)
1829*c0909341SAndroid Build Coastguard Worker    punpcklwd             m6, m7                     ;8, 9, 10, 11, 12, 13, 14, 15(2)
1830*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m3
1831*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*6]
1832*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m3, m2                 ;packed 1, 3, 5, 7 high
1833*c0909341SAndroid Build Coastguard Worker    punpcklwd             m3, m2                     ;packed 1, 3, 5, 7 low
1834*c0909341SAndroid Build Coastguard Worker    punpckhwd             m2, m0, m1                 ;packed 0, 2, 4, 6 high
1835*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m1                     ;packed 0, 2, 4, 6 low
1836*c0909341SAndroid Build Coastguard Worker    punpckhwd             m1, m0, m3                 ;0, 1, 2, 3, 4, 5, 6, 7(1)
1837*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m3                     ;0, 1, 2, 3, 4, 5, 6, 7(0)
1838*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m2, m7                 ;0, 1, 2, 3, 4, 5, 6, 7(3)
1839*c0909341SAndroid Build Coastguard Worker    punpcklwd             m2, m7                     ;0, 1, 2, 3, 4, 5, 6, 7(2)
1840*c0909341SAndroid Build Coastguard Worker    jmp                 tx2q
1841*c0909341SAndroid Build Coastguard Worker
1842*c0909341SAndroid Build Coastguard Worker.pass2:
1843*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(m(idct_8x4_internal_8bpc).pass2)]
1844*c0909341SAndroid Build Coastguard Worker
1845*c0909341SAndroid Build Coastguard Worker.pass2_end:
1846*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m4
1847*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m5
1848*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m6
1849*c0909341SAndroid Build Coastguard Worker    lea                   r3, [dstq+8]
1850*c0909341SAndroid Build Coastguard Worker    call                tx2q
1851*c0909341SAndroid Build Coastguard Worker
1852*c0909341SAndroid Build Coastguard Worker    add               coeffq, 16*4
1853*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*0]
1854*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*1]
1855*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*2]
1856*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*3]
1857*c0909341SAndroid Build Coastguard Worker    mov                 dstq, r3
1858*c0909341SAndroid Build Coastguard Worker    jmp                 tx2q
1859*c0909341SAndroid Build Coastguard Worker
1860*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1861*c0909341SAndroid Build Coastguard Workercglobal_label .main
1862*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m7, m0, m1                 ;low:in1  high:in3
1863*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m0, m1
1864*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m1, m2, m3
1865*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m3, m2                     ;low:in7  high:in5
1866*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m7
1867*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m3
1868*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*7]
1869*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m4, m5
1870*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m4, m5                     ;low:in9  high:in11
1871*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m3, m6, m7
1872*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m7, m6                     ;low:in15 high:in13
1873*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m4
1874*c0909341SAndroid Build Coastguard Worker    IDCT8_1D_PACKED
1875*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*4]
1876*c0909341SAndroid Build Coastguard Worker    mova                  m4, [coeffq+16*5]
1877*c0909341SAndroid Build Coastguard Worker    mova                  m5, [coeffq+16*6]
1878*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m1
1879*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m2
1880*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m3
1881*c0909341SAndroid Build Coastguard Worker
1882*c0909341SAndroid Build Coastguard Worker    IDCT16_1D_PACKED_ODDHALF 6, 4, 5, 7, 1, 2, 3
1883*c0909341SAndroid Build Coastguard Worker
1884*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*4]
1885*c0909341SAndroid Build Coastguard Worker    psubsw                m3, m0, m7                 ;low:out15 high:out14
1886*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m7                     ;low:out0  high:out1
1887*c0909341SAndroid Build Coastguard Worker    psubsw                m7, m1, m5                 ;low:out12 high:out13
1888*c0909341SAndroid Build Coastguard Worker    paddsw                m1, m5                     ;low:out3  high:out2
1889*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m3
1890*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*5]
1891*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*6]
1892*c0909341SAndroid Build Coastguard Worker    psubsw                m5, m2, m4                 ;low:out11 high:out10
1893*c0909341SAndroid Build Coastguard Worker    paddsw                m2, m4                     ;low:out4  high:out5
1894*c0909341SAndroid Build Coastguard Worker    psubsw                m4, m3, m6                 ;low:out8  high:out9
1895*c0909341SAndroid Build Coastguard Worker    paddsw                m3, m6                     ;low:out7  high:out6
1896*c0909341SAndroid Build Coastguard Worker    mova                  m6, m7
1897*c0909341SAndroid Build Coastguard Worker    ret
1898*c0909341SAndroid Build Coastguard Worker
1899*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct
1900*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst
1901*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst
1902*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity
1903*c0909341SAndroid Build Coastguard Worker
1904*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
1905*c0909341SAndroid Build Coastguard Worker    LOAD_7ROWS        coeffq, 16
1906*c0909341SAndroid Build Coastguard Worker    call .main
1907*c0909341SAndroid Build Coastguard Worker    call .main_pass1_end
1908*c0909341SAndroid Build Coastguard Worker
1909*c0909341SAndroid Build Coastguard Worker    punpckhwd             m6, m7, m0                 ;packed -out11, -out15
1910*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m7                     ;packed   out0,   out4
1911*c0909341SAndroid Build Coastguard Worker    punpcklwd             m7, m3, m4                 ;packed  -out3,  -out7
1912*c0909341SAndroid Build Coastguard Worker    punpckhwd             m4, m3                     ;packed   out8,  out12
1913*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*6]
1914*c0909341SAndroid Build Coastguard Worker    punpcklwd             m3, m1, m5                 ;packed  -out1,  -out5
1915*c0909341SAndroid Build Coastguard Worker    punpckhwd             m5, m1                     ;packed  out10,  out14
1916*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*7]
1917*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m3
1918*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m7
1919*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m2, m1                 ;packed  -out9,  -out13
1920*c0909341SAndroid Build Coastguard Worker    punpcklwd             m1, m2                     ;packed   out2,   out6
1921*c0909341SAndroid Build Coastguard Worker
1922*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_16384)]
1923*c0909341SAndroid Build Coastguard Worker
1924*c0909341SAndroid Build Coastguard Worker.pass1_end:
1925*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m7}, m0, m1, m4, m5
1926*c0909341SAndroid Build Coastguard Worker    pxor                  m2, m2
1927*c0909341SAndroid Build Coastguard Worker    psubw                 m2, m7
1928*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*6]
1929*c0909341SAndroid Build Coastguard Worker    REPX    {pmulhrsw x, m2}, m7, m3, m6
1930*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m2, [coeffq+16*7]
1931*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m7
1932*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_16x4_internal_8bpc).pass1_end3
1933*c0909341SAndroid Build Coastguard Worker
1934*c0909341SAndroid Build Coastguard Worker.pass2:
1935*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(m(iadst_8x4_internal_8bpc).pass2)]
1936*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_16x4_internal_8bpc).pass2_end
1937*c0909341SAndroid Build Coastguard Worker
1938*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1939*c0909341SAndroid Build Coastguard Workercglobal_label .main
1940*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m0
1941*c0909341SAndroid Build Coastguard Worker    pshufd                m0, m1, q1032
1942*c0909341SAndroid Build Coastguard Worker    pshufd                m2, m2, q1032
1943*c0909341SAndroid Build Coastguard Worker    punpckhwd             m1, m6, m0                 ;packed in13,  in2
1944*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m6                     ;packed  in3, in12
1945*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m5, m2                 ;packed in11,  in4
1946*c0909341SAndroid Build Coastguard Worker    punpcklwd             m2, m5                     ;packed  in5, in10
1947*c0909341SAndroid Build Coastguard Worker    mova                  m6, [o(pd_2048)]
1948*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         1, 5, 6,  995, 3973       ;low:t2   high:t3
1949*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         7, 5, 6, 1751, 3703       ;low:t4   high:t5
1950*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         2, 5, 6, 3513, 2106       ;low:t10  high:t11
1951*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         0, 5, 6, 3857, 1380       ;low:t12  high:t13
1952*c0909341SAndroid Build Coastguard Worker    psubsw                m5, m1, m2                 ;low:t10a high:t11a
1953*c0909341SAndroid Build Coastguard Worker    paddsw                m1, m2                     ;low:t2a  high:t3a
1954*c0909341SAndroid Build Coastguard Worker    psubsw                m2, m7, m0                 ;low:t12a high:t13a
1955*c0909341SAndroid Build Coastguard Worker    paddsw                m7, m0                     ;low:t4a  high:t5a
1956*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m0, m5
1957*c0909341SAndroid Build Coastguard Worker    punpckhwd             m0, m5                     ;packed t10a, t11a
1958*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m5, m2
1959*c0909341SAndroid Build Coastguard Worker    punpckhwd             m2, m5                     ;packed t13a, t12a
1960*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         0, 5, 6, 3406, 2276       ;low:t10  high:t11
1961*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         2, 5, 6, 4017,  799, 1    ;low:t12  high:t13
1962*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m1
1963*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m7
1964*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*6]
1965*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*7]
1966*c0909341SAndroid Build Coastguard Worker    pshufd                m1, m1, q1032
1967*c0909341SAndroid Build Coastguard Worker    pshufd                m3, m3, q1032
1968*c0909341SAndroid Build Coastguard Worker    punpckhwd             m5, m7, m1                 ;packed in15,  in0
1969*c0909341SAndroid Build Coastguard Worker    punpcklwd             m1, m7                     ;packed  in1, in14
1970*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m4, m3                 ;packed  in9,  in6
1971*c0909341SAndroid Build Coastguard Worker    punpcklwd             m3, m4                     ;packed  in7,  in8
1972*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         5, 4, 6,  201, 4091       ;low:t0    high:t1
1973*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         7, 4, 6, 2440, 3290       ;low:t6    high:t7
1974*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         3, 4, 6, 3035, 2751       ;low:t8    high:t9
1975*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         1, 4, 6, 4052,  601       ;low:t14   high:t15
1976*c0909341SAndroid Build Coastguard Worker    psubsw                m4, m5, m3                 ;low:t8a   high:t9a
1977*c0909341SAndroid Build Coastguard Worker    paddsw                m5, m3                     ;low:t0a   high:t1a
1978*c0909341SAndroid Build Coastguard Worker    psubsw                m3, m7, m1                 ;low:t14a  high:t15a
1979*c0909341SAndroid Build Coastguard Worker    paddsw                m7, m1                     ;low:t6a   high:t7a
1980*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m1, m4
1981*c0909341SAndroid Build Coastguard Worker    punpckhwd             m1, m4                     ;packed  t8a,  t9a
1982*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m3
1983*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m4                     ;packed t15a, t14a
1984*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         1, 4, 6,  799, 4017       ;low:t8    high:t9
1985*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         3, 4, 6, 2276, 3406, 1    ;low:t14   high:t15
1986*c0909341SAndroid Build Coastguard Worker    paddsw                m4, m1, m2                 ;low:t12a  high:t13a
1987*c0909341SAndroid Build Coastguard Worker    psubsw                m1, m2                     ;low:t8a   high:t9a
1988*c0909341SAndroid Build Coastguard Worker    psubsw                m2, m0, m3                 ;low:t14a  high:t15a
1989*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m3                     ;low:t10a  high:t11a
1990*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m3, m1
1991*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m1                     ;packed t12a, t13a
1992*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m1, m2
1993*c0909341SAndroid Build Coastguard Worker    punpckhwd             m2, m1                     ;packed t15a, t14a
1994*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         3, 1, 6, 1567, 3784       ;low:t12   high:t13
1995*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         2, 1, 6, 3784, 1567, 1    ;low:t14   high:t15
1996*c0909341SAndroid Build Coastguard Worker    psubsw                m1, m3, m2                 ;low:t14a  high:t15a
1997*c0909341SAndroid Build Coastguard Worker    paddsw                m3, m2                     ;low:out2  high:-out13
1998*c0909341SAndroid Build Coastguard Worker    psubsw                m2, m4, m0                 ;low:t10   high:t11
1999*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m4                     ;low:-out1 high:out14
2000*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m0
2001*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m3
2002*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*4]
2003*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*5]
2004*c0909341SAndroid Build Coastguard Worker    psubsw                m4, m5, m3                 ;low:t4    high:t5
2005*c0909341SAndroid Build Coastguard Worker    paddsw                m5, m3                     ;low:t0    high:t1
2006*c0909341SAndroid Build Coastguard Worker    psubsw                m3, m0, m7                 ;low:t6    high:t7
2007*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m7                     ;low:t2    high:t3
2008*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m7, m4
2009*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m4                     ;packed t4, t5
2010*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m3
2011*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m4                     ;packed t7, t6
2012*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         7, 4, 6, 1567, 3784       ;low:t4a   high:t5a
2013*c0909341SAndroid Build Coastguard Worker    ITX_MUL2X_PACK         3, 4, 6, 3784, 1567, 1    ;low:t6a   high:t7a
2014*c0909341SAndroid Build Coastguard Worker    psubsw                m4, m5, m0                 ;low:t2a   high:t3a
2015*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m5                     ;low:out0  high:-out15
2016*c0909341SAndroid Build Coastguard Worker    psubsw                m5, m7, m3                 ;low:t6    high:t7
2017*c0909341SAndroid Build Coastguard Worker    paddsw                m3, m7                     ;low:-out3 high:out12
2018*c0909341SAndroid Build Coastguard Worker    ret
2019*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2020*c0909341SAndroid Build Coastguard Worker.main_pass1_end:
2021*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(deint_shuf1)]
2022*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*4], m0
2023*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m3
2024*c0909341SAndroid Build Coastguard Worker    mova                  m0, [o(pw_2896_m2896)]
2025*c0909341SAndroid Build Coastguard Worker    mova                  m3, [o(pw_2896_2896)]
2026*c0909341SAndroid Build Coastguard Worker    pshufb                m1, m7                     ;t14a t15a
2027*c0909341SAndroid Build Coastguard Worker    pshufb                m2, m7                     ;t10  t11
2028*c0909341SAndroid Build Coastguard Worker    pshufb                m4, m7                     ;t2a  t3a
2029*c0909341SAndroid Build Coastguard Worker    pshufb                m5, m7                     ;t6   t7
2030*c0909341SAndroid Build Coastguard Worker    pmaddwd               m7, m0, m2
2031*c0909341SAndroid Build Coastguard Worker    pmaddwd               m2, m3
2032*c0909341SAndroid Build Coastguard Worker    paddd                 m7, m6
2033*c0909341SAndroid Build Coastguard Worker    paddd                 m2, m6
2034*c0909341SAndroid Build Coastguard Worker    psrad                 m7, 12
2035*c0909341SAndroid Build Coastguard Worker    psrad                 m2, 12
2036*c0909341SAndroid Build Coastguard Worker    packssdw              m2, m7                     ;low:out6  high:-out9
2037*c0909341SAndroid Build Coastguard Worker    pmaddwd               m7, m0, m4
2038*c0909341SAndroid Build Coastguard Worker    pmaddwd               m4, m3
2039*c0909341SAndroid Build Coastguard Worker    paddd                 m7, m6
2040*c0909341SAndroid Build Coastguard Worker    paddd                 m4, m6
2041*c0909341SAndroid Build Coastguard Worker    psrad                 m7, 12
2042*c0909341SAndroid Build Coastguard Worker    psrad                 m4, 12
2043*c0909341SAndroid Build Coastguard Worker    packssdw              m4, m7                     ;low:-out7 high:out8
2044*c0909341SAndroid Build Coastguard Worker    pmaddwd               m7, m3, m5
2045*c0909341SAndroid Build Coastguard Worker    pmaddwd               m5, m0
2046*c0909341SAndroid Build Coastguard Worker    paddd                 m7, m6
2047*c0909341SAndroid Build Coastguard Worker    paddd                 m5, m6
2048*c0909341SAndroid Build Coastguard Worker    psrad                 m7, 12
2049*c0909341SAndroid Build Coastguard Worker    psrad                 m5, 12
2050*c0909341SAndroid Build Coastguard Worker    packssdw              m7, m5                     ;low:out4  high:-out11
2051*c0909341SAndroid Build Coastguard Worker    pmaddwd               m5, m3, m1
2052*c0909341SAndroid Build Coastguard Worker    pmaddwd               m1, m0
2053*c0909341SAndroid Build Coastguard Worker    paddd                 m5, m6
2054*c0909341SAndroid Build Coastguard Worker    paddd                 m1, m6
2055*c0909341SAndroid Build Coastguard Worker    psrad                 m5, 12
2056*c0909341SAndroid Build Coastguard Worker    psrad                 m1, 12
2057*c0909341SAndroid Build Coastguard Worker    packssdw              m5, m1                     ;low:-out5 high:out10
2058*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*4]
2059*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*5]
2060*c0909341SAndroid Build Coastguard Worker    ret
2061*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2062*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end
2063*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_2896x8)]
2064*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m6, m2, m1                 ;low:t11   high:t15a
2065*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m1                     ;low:t10   high:t14a
2066*c0909341SAndroid Build Coastguard Worker    psubsw                m1, m2, m6
2067*c0909341SAndroid Build Coastguard Worker    paddsw                m2, m6
2068*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m6, m4, m5                 ;low:t3a   high:t7
2069*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m5                     ;low:t2a   high:t6
2070*c0909341SAndroid Build Coastguard Worker    psubsw                m5, m4, m6
2071*c0909341SAndroid Build Coastguard Worker    paddsw                m4, m6
2072*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m1, m7                     ;low:-out9 high:out10
2073*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m2, m7                     ;low:out6  high:-out5
2074*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m7                     ;low:out8  high:-out11
2075*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m4, m7                     ;low:-out7 high:out4
2076*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m7, m4, m5                 ;low:out4  high:-out11
2077*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m5                     ;low:-out7 high:out8
2078*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m5, m2, m1                 ;low:-out5 high:out10
2079*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m1                     ;low:out6  high:-out9
2080*c0909341SAndroid Build Coastguard Worker    ret
2081*c0909341SAndroid Build Coastguard Worker
2082*c0909341SAndroid Build Coastguard Worker
2083*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct
2084*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst
2085*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst
2086*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity
2087*c0909341SAndroid Build Coastguard Worker
2088*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2089*c0909341SAndroid Build Coastguard Worker    LOAD_7ROWS        coeffq, 16
2090*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main
2091*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main_pass1_end
2092*c0909341SAndroid Build Coastguard Worker
2093*c0909341SAndroid Build Coastguard Worker    punpcklwd             m6, m7, m0                 ;packed  out11,  out15
2094*c0909341SAndroid Build Coastguard Worker    punpckhwd             m0, m7                     ;packed  -out0,  -out4
2095*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m3, m4                 ;packed   out3,   out7
2096*c0909341SAndroid Build Coastguard Worker    punpcklwd             m4, m3                     ;packed  -out8, -out12
2097*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*6]
2098*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m1, m5                 ;packed   out1,   out5
2099*c0909341SAndroid Build Coastguard Worker    punpcklwd             m5, m1                     ;packed -out10, -out14
2100*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*7]
2101*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m3
2102*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m7
2103*c0909341SAndroid Build Coastguard Worker    punpcklwd             m3, m2, m1                 ;packed   out9,  out13
2104*c0909341SAndroid Build Coastguard Worker    punpckhwd             m1, m2                     ;packed  -out2,  -out6
2105*c0909341SAndroid Build Coastguard Worker
2106*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_m16384)]
2107*c0909341SAndroid Build Coastguard Worker    jmp   m(iadst_16x4_internal_8bpc).pass1_end
2108*c0909341SAndroid Build Coastguard Worker
2109*c0909341SAndroid Build Coastguard Worker.pass2:
2110*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(m(iflipadst_8x4_internal_8bpc).pass2)]
2111*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_16x4_internal_8bpc).pass2_end
2112*c0909341SAndroid Build Coastguard Worker
2113*c0909341SAndroid Build Coastguard Worker
2114*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct
2115*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst
2116*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst
2117*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity
2118*c0909341SAndroid Build Coastguard Worker
2119*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2120*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*6]
2121*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*5]
2122*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*7]
2123*c0909341SAndroid Build Coastguard Worker    mova                  m6, [o(pw_1697x16)]
2124*c0909341SAndroid Build Coastguard Worker    mova                  m7, [o(pw_16384)]
2125*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m4, m6, m1
2126*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m3, m6, m0
2127*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m6, m2
2128*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m4, m7
2129*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m3, m7
2130*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m7
2131*c0909341SAndroid Build Coastguard Worker    paddsw                m1, m4
2132*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m3
2133*c0909341SAndroid Build Coastguard Worker    paddsw                m5, m2
2134*c0909341SAndroid Build Coastguard Worker    mova                  m2, [coeffq+16*2]
2135*c0909341SAndroid Build Coastguard Worker    mova                  m3, [coeffq+16*3]
2136*c0909341SAndroid Build Coastguard Worker    mova                  m4, [coeffq+16*4]
2137*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m1
2138*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*5], m0
2139*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*7], m5
2140*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, m6, m2
2141*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m1, m6, m3
2142*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m6, m4
2143*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m0, m7
2144*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m1, m7
2145*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m7
2146*c0909341SAndroid Build Coastguard Worker    paddsw                m2, m0
2147*c0909341SAndroid Build Coastguard Worker    paddsw                m3, m1
2148*c0909341SAndroid Build Coastguard Worker    paddsw                m4, m5
2149*c0909341SAndroid Build Coastguard Worker    mova                  m0, [coeffq+16*0]
2150*c0909341SAndroid Build Coastguard Worker    mova                  m1, [coeffq+16*1]
2151*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m6, m0
2152*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m6, m1
2153*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m5, m7
2154*c0909341SAndroid Build Coastguard Worker    pmulhrsw              m6, m7
2155*c0909341SAndroid Build Coastguard Worker    paddsw                m0, m5
2156*c0909341SAndroid Build Coastguard Worker    paddsw                m1, m6
2157*c0909341SAndroid Build Coastguard Worker    mova                  m6, [coeffq+16*6]
2158*c0909341SAndroid Build Coastguard Worker    mova                  m5, [coeffq+16*5]
2159*c0909341SAndroid Build Coastguard Worker    punpckhwd             m7, m0, m2                 ;packed out1,  out5
2160*c0909341SAndroid Build Coastguard Worker    punpcklwd             m0, m2                     ;packed out0,  out4
2161*c0909341SAndroid Build Coastguard Worker    punpckhwd             m2, m1, m3                 ;packed out3,  out7
2162*c0909341SAndroid Build Coastguard Worker    punpcklwd             m1, m3                     ;packed out2,  out6
2163*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6], m7
2164*c0909341SAndroid Build Coastguard Worker    mova                  m7, [coeffq+16*7]
2165*c0909341SAndroid Build Coastguard Worker    punpckhwd             m3, m4, m6                 ;packed out9,  out13
2166*c0909341SAndroid Build Coastguard Worker    punpcklwd             m4, m6                     ;packed out8,  out12
2167*c0909341SAndroid Build Coastguard Worker    punpckhwd             m6, m5, m7                 ;packed out11, out15
2168*c0909341SAndroid Build Coastguard Worker    punpcklwd             m5, m7                     ;packed out10, out14
2169*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_16x4_internal_8bpc).pass1_end3
2170*c0909341SAndroid Build Coastguard Worker
2171*c0909341SAndroid Build Coastguard Worker.pass2:
2172*c0909341SAndroid Build Coastguard Worker    lea                 tx2q, [o(m(iidentity_8x4_internal_8bpc).pass2)]
2173*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_16x4_internal_8bpc).pass2_end
2174*c0909341SAndroid Build Coastguard Worker
2175*c0909341SAndroid Build Coastguard Worker
2176*c0909341SAndroid Build Coastguard Worker%macro SAVE_8ROWS 2  ;src, stride
2177*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*0], m0
2178*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*1], m1
2179*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*2], m2
2180*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*3], m3
2181*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*4], m4
2182*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*5], m5
2183*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*6], m6
2184*c0909341SAndroid Build Coastguard Worker    mova                 [%1+%2*7], m7
2185*c0909341SAndroid Build Coastguard Worker%endmacro
2186*c0909341SAndroid Build Coastguard Worker
2187*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2 ; type1, type2
2188*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 8x16, 8, 16*16
2189*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
2190*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, [coeffq], q0000
2191*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m0
2192*c0909341SAndroid Build Coastguard Worker    mova                 m1, [o(pw_2896x8)]
2193*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
2194*c0909341SAndroid Build Coastguard Worker    mova                 m2, [o(pw_16384)]
2195*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd
2196*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
2197*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
2198*c0909341SAndroid Build Coastguard Worker    psrlw                m2, 3              ; pw_2048
2199*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
2200*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
2201*c0909341SAndroid Build Coastguard Worker    mov                 r3d, 4
2202*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(inv_txfm_add_dct_dct_8x16_8bpc).end)]
2203*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_8bpc).loop
2204*c0909341SAndroid Build Coastguard Worker.end:
2205*c0909341SAndroid Build Coastguard Worker    RET
2206*c0909341SAndroid Build Coastguard Worker%endif
2207*c0909341SAndroid Build Coastguard Worker%endmacro
2208*c0909341SAndroid Build Coastguard Worker
2209*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct
2210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst
2211*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst
2212*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity
2213*c0909341SAndroid Build Coastguard Worker
2214*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2215*c0909341SAndroid Build Coastguard Worker    lea                    r3, [o(m(idct_8x8_internal_8bpc).pass1)]
2216*c0909341SAndroid Build Coastguard Worker
2217*c0909341SAndroid Build Coastguard Worker.pass1:
2218*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32, 1
2219*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize+16*11], tx2q
2220*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).pass1_end)]
2221*c0909341SAndroid Build Coastguard Worker    jmp                    r3
2222*c0909341SAndroid Build Coastguard Worker
2223*c0909341SAndroid Build Coastguard Worker.pass1_end:
2224*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*1, 32
2225*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*0, 32, 1
2226*c0909341SAndroid Build Coastguard Worker    mov                  tx2q, [rsp+gprsize+16*11]
2227*c0909341SAndroid Build Coastguard Worker    jmp                    r3
2228*c0909341SAndroid Build Coastguard Worker
2229*c0909341SAndroid Build Coastguard Worker.pass2:
2230*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end)]
2231*c0909341SAndroid Build Coastguard Worker
2232*c0909341SAndroid Build Coastguard Worker.pass2_pre:
2233*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*2 ], m1
2234*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*6 ], m3
2235*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*10], m5
2236*c0909341SAndroid Build Coastguard Worker    mova       [coeffq+16*14], m7
2237*c0909341SAndroid Build Coastguard Worker    mova                   m1, m2
2238*c0909341SAndroid Build Coastguard Worker    mova                   m2, m4
2239*c0909341SAndroid Build Coastguard Worker    mova                   m3, m6
2240*c0909341SAndroid Build Coastguard Worker    mova                   m4, [coeffq+16*1 ]
2241*c0909341SAndroid Build Coastguard Worker    mova                   m5, [coeffq+16*5 ]
2242*c0909341SAndroid Build Coastguard Worker    mova                   m6, [coeffq+16*9 ]
2243*c0909341SAndroid Build Coastguard Worker    mova                   m7, [coeffq+16*13]
2244*c0909341SAndroid Build Coastguard Worker
2245*c0909341SAndroid Build Coastguard Worker.pass2_main:
2246*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_8bpc).main
2247*c0909341SAndroid Build Coastguard Worker
2248*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS   rsp+gprsize+16*3, 16
2249*c0909341SAndroid Build Coastguard Worker    mova                   m0, [coeffq+16*2 ]
2250*c0909341SAndroid Build Coastguard Worker    mova                   m1, [coeffq+16*6 ]
2251*c0909341SAndroid Build Coastguard Worker    mova                   m2, [coeffq+16*10]
2252*c0909341SAndroid Build Coastguard Worker    mova                   m3, [coeffq+16*14]
2253*c0909341SAndroid Build Coastguard Worker    mova                   m4, [coeffq+16*3 ]
2254*c0909341SAndroid Build Coastguard Worker    mova                   m5, [coeffq+16*7 ]
2255*c0909341SAndroid Build Coastguard Worker    mova                   m6, [coeffq+16*11]
2256*c0909341SAndroid Build Coastguard Worker    mova                   m7, [coeffq+16*15]
2257*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
2258*c0909341SAndroid Build Coastguard Worker
2259*c0909341SAndroid Build Coastguard Worker    mov                    r3, dstq
2260*c0909341SAndroid Build Coastguard Worker    lea                  dstq, [dstq+strideq*8]
2261*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
2262*c0909341SAndroid Build Coastguard Worker
2263*c0909341SAndroid Build Coastguard Worker.end:
2264*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
2265*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
2266*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2267*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
2268*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
2269*c0909341SAndroid Build Coastguard Worker
2270*c0909341SAndroid Build Coastguard Worker.end1:
2271*c0909341SAndroid Build Coastguard Worker    pxor                   m7, m7
2272*c0909341SAndroid Build Coastguard Worker    REPX  {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
2273*c0909341SAndroid Build Coastguard Worker    ret
2274*c0909341SAndroid Build Coastguard Worker
2275*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct
2276*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst
2277*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst
2278*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity
2279*c0909341SAndroid Build Coastguard Worker
2280*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2281*c0909341SAndroid Build Coastguard Worker    lea                    r3, [o(m(iadst_8x8_internal_8bpc).pass1)]
2282*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x16_internal_8bpc).pass1
2283*c0909341SAndroid Build Coastguard Worker
2284*c0909341SAndroid Build Coastguard Worker.pass2:
2285*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(iadst_8x16_internal_8bpc).end)]
2286*c0909341SAndroid Build Coastguard Worker
2287*c0909341SAndroid Build Coastguard Worker.pass2_pre:
2288*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
2289*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
2290*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m6
2291*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m7
2292*c0909341SAndroid Build Coastguard Worker    mova                    m0, m2
2293*c0909341SAndroid Build Coastguard Worker    mova                    m1, m3
2294*c0909341SAndroid Build Coastguard Worker    mova                    m2, m4
2295*c0909341SAndroid Build Coastguard Worker    mova                    m3, m5
2296*c0909341SAndroid Build Coastguard Worker
2297*c0909341SAndroid Build Coastguard Worker.pass2_main:
2298*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*1 ]
2299*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*3 ]
2300*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*13]
2301*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*15]
2302*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m4
2303*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m5
2304*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m6
2305*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m7
2306*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*5 ]
2307*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*7 ]
2308*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*9 ]
2309*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*11]
2310*c0909341SAndroid Build Coastguard Worker
2311*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
2312*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass2_end
2313*c0909341SAndroid Build Coastguard Worker
2314*c0909341SAndroid Build Coastguard Worker    mov                    r3, dstq
2315*c0909341SAndroid Build Coastguard Worker    lea                  dstq, [dstq+strideq*8]
2316*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_8bpc).end
2317*c0909341SAndroid Build Coastguard Worker
2318*c0909341SAndroid Build Coastguard Worker.end:
2319*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
2320*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
2321*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2322*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
2323*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).end
2324*c0909341SAndroid Build Coastguard Worker
2325*c0909341SAndroid Build Coastguard Worker
2326*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct
2327*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst
2328*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst
2329*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity
2330*c0909341SAndroid Build Coastguard Worker
2331*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2332*c0909341SAndroid Build Coastguard Worker    lea                    r3, [o(m(iflipadst_8x8_internal_8bpc).pass1)]
2333*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x16_internal_8bpc).pass1
2334*c0909341SAndroid Build Coastguard Worker
2335*c0909341SAndroid Build Coastguard Worker.pass2:
2336*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(iflipadst_8x16_internal_8bpc).end)]
2337*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+strideq*8]
2338*c0909341SAndroid Build Coastguard Worker
2339*c0909341SAndroid Build Coastguard Worker.pass2_pre:
2340*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
2341*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
2342*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m6
2343*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m7
2344*c0909341SAndroid Build Coastguard Worker    mova                    m0, m2
2345*c0909341SAndroid Build Coastguard Worker    mova                    m1, m3
2346*c0909341SAndroid Build Coastguard Worker    mova                    m2, m4
2347*c0909341SAndroid Build Coastguard Worker    mova                    m3, m5
2348*c0909341SAndroid Build Coastguard Worker
2349*c0909341SAndroid Build Coastguard Worker.pass2_main:
2350*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*1 ]
2351*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*3 ]
2352*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*13]
2353*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*15]
2354*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m4
2355*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m5
2356*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m6
2357*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m7
2358*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*5 ]
2359*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*7 ]
2360*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*9 ]
2361*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*11]
2362*c0909341SAndroid Build Coastguard Worker
2363*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
2364*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass2_end
2365*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).end
2366*c0909341SAndroid Build Coastguard Worker
2367*c0909341SAndroid Build Coastguard Worker.end:
2368*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
2369*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
2370*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2371*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
2372*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).end
2373*c0909341SAndroid Build Coastguard Worker
2374*c0909341SAndroid Build Coastguard Worker
2375*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct
2376*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst
2377*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst
2378*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity
2379*c0909341SAndroid Build Coastguard Worker
2380*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2381*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32, 1
2382*c0909341SAndroid Build Coastguard Worker    mov                    r3, tx2q
2383*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.pass1_end)]
2384*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*1], m6
2385*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass1_end3
2386*c0909341SAndroid Build Coastguard Worker
2387*c0909341SAndroid Build Coastguard Worker.pass1_end:
2388*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*1, 32
2389*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*0, 32, 1
2390*c0909341SAndroid Build Coastguard Worker    mov                  tx2q, r3
2391*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*1], m6
2392*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass1_end3
2393*c0909341SAndroid Build Coastguard Worker
2394*c0909341SAndroid Build Coastguard Worker.pass2:
2395*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end1)]
2396*c0909341SAndroid Build Coastguard Worker
2397*c0909341SAndroid Build Coastguard Worker.end:
2398*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
2399*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*1], m6
2400*c0909341SAndroid Build Coastguard Worker    mova                   m7, [o(pw_1697x16)]
2401*c0909341SAndroid Build Coastguard Worker    REPX     {IDTX16 x, 6, 7}, 0, 1, 2, 3, 4, 5
2402*c0909341SAndroid Build Coastguard Worker    mova                   m6, [rsp+gprsize+16*1]
2403*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*2], m5
2404*c0909341SAndroid Build Coastguard Worker    IDTX16                  6, 5, 7
2405*c0909341SAndroid Build Coastguard Worker    mova                   m5, [rsp+gprsize+16*0]
2406*c0909341SAndroid Build Coastguard Worker    IDTX16                  5, 7, 7
2407*c0909341SAndroid Build Coastguard Worker    mova                   m7, [o(pw_2048)]
2408*c0909341SAndroid Build Coastguard Worker    REPX     {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
2409*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m7, [rsp+gprsize+16*2]
2410*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m5
2411*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*1], m6
2412*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*2], m7
2413*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end3
2414*c0909341SAndroid Build Coastguard Worker
2415*c0909341SAndroid Build Coastguard Worker.end1:
2416*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32
2417*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2418*c0909341SAndroid Build Coastguard Worker    lea                  dstq, [dstq+strideq*2]
2419*c0909341SAndroid Build Coastguard Worker    jmp .end
2420*c0909341SAndroid Build Coastguard Worker
2421*c0909341SAndroid Build Coastguard Worker
2422*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2 ; type1, type2
2423*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 16x8, 8, 16*16
2424*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
2425*c0909341SAndroid Build Coastguard Worker    movd                 m1, [o(pw_2896x8)]
2426*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, [coeffq]
2427*c0909341SAndroid Build Coastguard Worker    movd                 m2, [o(pw_16384)]
2428*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd
2429*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
2430*c0909341SAndroid Build Coastguard Worker    mov                 r2d, 4
2431*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(m(inv_txfm_add_dct_dct_16x8_8bpc).end)]
2432*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly
2433*c0909341SAndroid Build Coastguard Worker.end:
2434*c0909341SAndroid Build Coastguard Worker    RET
2435*c0909341SAndroid Build Coastguard Worker%endif
2436*c0909341SAndroid Build Coastguard Worker%endmacro
2437*c0909341SAndroid Build Coastguard Worker
2438*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct
2439*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst
2440*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst
2441*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity
2442*c0909341SAndroid Build Coastguard Worker
2443*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2444*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*0, 32, 1
2445*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_8bpc).main
2446*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS   rsp+gprsize+16*3, 16
2447*c0909341SAndroid Build Coastguard Worker
2448*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32, 1
2449*c0909341SAndroid Build Coastguard Worker    call  .main
2450*c0909341SAndroid Build Coastguard Worker    mov                    r3, tx2q
2451*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.pass1_end)]
2452*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass1_end
2453*c0909341SAndroid Build Coastguard Worker
2454*c0909341SAndroid Build Coastguard Worker.pass1_end:
2455*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*1, 32
2456*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
2457*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
2458*c0909341SAndroid Build Coastguard Worker    mov                  tx2q, r3
2459*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass1_end
2460*c0909341SAndroid Build Coastguard Worker
2461*c0909341SAndroid Build Coastguard Worker.pass2:
2462*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end)]
2463*c0909341SAndroid Build Coastguard Worker    lea                    r3, [dstq+8]
2464*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass2_main
2465*c0909341SAndroid Build Coastguard Worker
2466*c0909341SAndroid Build Coastguard Worker.end:
2467*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32
2468*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2469*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
2470*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).pass2_main
2471*c0909341SAndroid Build Coastguard Worker
2472*c0909341SAndroid Build Coastguard Worker
2473*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2474*c0909341SAndroid Build Coastguard Workercglobal_label .main
2475*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*1], m2
2476*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*2], m6
2477*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+32*5], m5
2478*c0909341SAndroid Build Coastguard Worker
2479*c0909341SAndroid Build Coastguard Worker    mova                   m6, [o(pd_2048)]
2480*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           0, 7, 2, 5, 6,  401, 4076   ;t8a, t15a
2481*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           4, 3, 2, 5, 6, 3166, 2598   ;t9a, t14a
2482*c0909341SAndroid Build Coastguard Worker    psubsw                 m2, m0, m4                   ;t9
2483*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m4                       ;t8
2484*c0909341SAndroid Build Coastguard Worker    psubsw                 m4, m7, m3                   ;t14
2485*c0909341SAndroid Build Coastguard Worker    paddsw                 m7, m3                       ;t15
2486*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           4, 2, 3, 5, 6, 1567, 3784   ;t9a, t14a
2487*c0909341SAndroid Build Coastguard Worker    mova                   m3, [rsp+gprsize*2+16*1]
2488*c0909341SAndroid Build Coastguard Worker    mova                   m5, [rsp+gprsize*2+32*5]
2489*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*1], m2
2490*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+32*5], m4
2491*c0909341SAndroid Build Coastguard Worker    mova                   m2, [rsp+gprsize*2+16*2]
2492*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*2], m7
2493*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           3, 5, 7, 4, 6, 1931, 3612   ;t10a, t13a
2494*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           2, 1, 7, 4, 6, 3920, 1189   ;t11a, t12a
2495*c0909341SAndroid Build Coastguard Worker    psubsw                 m4, m2, m3                   ;t10
2496*c0909341SAndroid Build Coastguard Worker    paddsw                 m2, m3                       ;t11
2497*c0909341SAndroid Build Coastguard Worker    psubsw                 m3, m1, m5                   ;t13
2498*c0909341SAndroid Build Coastguard Worker    paddsw                 m1, m5                       ;t12
2499*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           3, 4, 7, 5, 6, m3784, 1567  ;t10a, t13a
2500*c0909341SAndroid Build Coastguard Worker    mova                   m7, [rsp+gprsize*2+32*5]
2501*c0909341SAndroid Build Coastguard Worker    psubsw                 m6, m0, m2                   ;t11a
2502*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m2                       ;t8a
2503*c0909341SAndroid Build Coastguard Worker    paddsw                 m2, m7, m3                   ;t9
2504*c0909341SAndroid Build Coastguard Worker    psubsw                 m7, m3                       ;t10
2505*c0909341SAndroid Build Coastguard Worker    mova                   m5, [rsp+gprsize*2+16*0]
2506*c0909341SAndroid Build Coastguard Worker    psubsw                 m3, m5, m0                   ;out8
2507*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m5                       ;out7
2508*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+32*5], m0
2509*c0909341SAndroid Build Coastguard Worker    mova                   m5, [rsp+gprsize*2+16*9]
2510*c0909341SAndroid Build Coastguard Worker    psubsw                 m0, m5, m2                   ;out9
2511*c0909341SAndroid Build Coastguard Worker    paddsw                 m2, m5                       ;out6
2512*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*0], m0
2513*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*9], m2
2514*c0909341SAndroid Build Coastguard Worker    mova                   m0, [rsp+gprsize*2+16*1]
2515*c0909341SAndroid Build Coastguard Worker    mova                   m2, [rsp+gprsize*2+16*2]
2516*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*1], m3
2517*c0909341SAndroid Build Coastguard Worker    psubsw                 m5, m0, m4                   ;t13
2518*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m4                       ;t14
2519*c0909341SAndroid Build Coastguard Worker    mova                   m3, [o(pd_2048)]
2520*c0909341SAndroid Build Coastguard Worker    psubsw                 m4, m2, m1                   ;t12a
2521*c0909341SAndroid Build Coastguard Worker    paddsw                 m1, m2                       ;t15a
2522*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*2], m1
2523*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           5, 7, 1, 2, 3, 2896, 2896   ;t10a, t13a
2524*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W           4, 6, 1, 2, 3, 2896, 2896   ;t11,  t12
2525*c0909341SAndroid Build Coastguard Worker    mova                   m3, [rsp+gprsize*2+16*8]
2526*c0909341SAndroid Build Coastguard Worker    psubsw                 m2, m3, m5                   ;out10
2527*c0909341SAndroid Build Coastguard Worker    paddsw                 m3, m5                       ;out5
2528*c0909341SAndroid Build Coastguard Worker    mova                   m5, [rsp+gprsize*2+16*7]
2529*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*8], m3
2530*c0909341SAndroid Build Coastguard Worker    psubsw                 m3, m5, m4                   ;out11
2531*c0909341SAndroid Build Coastguard Worker    paddsw                 m5, m4                       ;out4
2532*c0909341SAndroid Build Coastguard Worker    mova                   m4, [rsp+gprsize*2+16*6]
2533*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*7], m5
2534*c0909341SAndroid Build Coastguard Worker    paddsw                 m5, m4, m6                   ;out3
2535*c0909341SAndroid Build Coastguard Worker    psubsw                 m4, m6                       ;out12
2536*c0909341SAndroid Build Coastguard Worker    mova                   m6, [rsp+gprsize*2+16*5]
2537*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*6], m5
2538*c0909341SAndroid Build Coastguard Worker    psubsw                 m5, m6, m7                   ;out13
2539*c0909341SAndroid Build Coastguard Worker    paddsw                 m6, m7                       ;out2
2540*c0909341SAndroid Build Coastguard Worker    mova                   m7, [rsp+gprsize*2+16*4]
2541*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*5], m6
2542*c0909341SAndroid Build Coastguard Worker    psubsw                 m6, m7, m0                   ;out14
2543*c0909341SAndroid Build Coastguard Worker    paddsw                 m7, m0                       ;out1
2544*c0909341SAndroid Build Coastguard Worker    mova                   m1, [rsp+gprsize*2+16*2]
2545*c0909341SAndroid Build Coastguard Worker    mova                   m0, [rsp+gprsize*2+16*3]
2546*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*4], m7
2547*c0909341SAndroid Build Coastguard Worker    psubsw                 m7, m0, m1                   ;out15
2548*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m1                       ;out0
2549*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*3], m0
2550*c0909341SAndroid Build Coastguard Worker    mova                   m1, [rsp+gprsize*2+16*0]
2551*c0909341SAndroid Build Coastguard Worker    mova                   m0, [rsp+gprsize*2+16*1]
2552*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*0], m7
2553*c0909341SAndroid Build Coastguard Worker    ret
2554*c0909341SAndroid Build Coastguard Worker
2555*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct
2556*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst
2557*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst
2558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity
2559*c0909341SAndroid Build Coastguard Worker
2560*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2561*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2896x8)]
2562*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*0 ]
2563*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*1 ]
2564*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*14]
2565*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*15]
2566*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
2567*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
2568*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m2
2569*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m3
2570*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*6 ]
2571*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*7 ]
2572*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*8 ]
2573*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*9 ]
2574*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m2
2575*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m3
2576*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m0
2577*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m1
2578*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*2 ]
2579*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*3 ]
2580*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*4 ]
2581*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*5 ]
2582*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m7, [coeffq+16*10]
2583*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m5, m7, [coeffq+16*11]
2584*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m6, m7, [coeffq+16*12]
2585*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7,     [coeffq+16*13]
2586*c0909341SAndroid Build Coastguard Worker
2587*c0909341SAndroid Build Coastguard Worker    call .main
2588*c0909341SAndroid Build Coastguard Worker    call .main_pass1_end
2589*c0909341SAndroid Build Coastguard Worker    mov                    r3, tx2q
2590*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.pass1_end)]
2591*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_8bpc).pass1_end
2592*c0909341SAndroid Build Coastguard Worker
2593*c0909341SAndroid Build Coastguard Worker.pass1_end:
2594*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*1, 32
2595*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
2596*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
2597*c0909341SAndroid Build Coastguard Worker    mov                  tx2q, r3
2598*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_8bpc).pass1_end
2599*c0909341SAndroid Build Coastguard Worker
2600*c0909341SAndroid Build Coastguard Worker.pass2:
2601*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end)]
2602*c0909341SAndroid Build Coastguard Worker    lea                    r3, [dstq+8]
2603*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_8bpc).pass2_main
2604*c0909341SAndroid Build Coastguard Worker
2605*c0909341SAndroid Build Coastguard Worker.end:
2606*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32
2607*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2608*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
2609*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_8bpc).pass2_main
2610*c0909341SAndroid Build Coastguard Worker
2611*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2612*c0909341SAndroid Build Coastguard Workercglobal_label .main
2613*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m1
2614*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m2
2615*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m6
2616*c0909341SAndroid Build Coastguard Worker
2617*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pd_2048)]
2618*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            7, 0, 1, 2, 6,  995, 3973   ;t3,  t2
2619*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 4, 1, 2, 6, 3513, 2106   ;t11, t10
2620*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m0, m4                   ;t10a
2621*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m4                       ;t2a
2622*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m7, m3                   ;t11a
2623*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m7                       ;t3a
2624*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            1, 4, 7, 2, 6, 3406, 2276   ;t11, t10
2625*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*0]     ;in3
2626*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*1]     ;in4
2627*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*0], m1                       ;t11
2628*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*1], m4                       ;t10
2629*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*2]     ;in12
2630*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m0                       ;t2a
2631*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 7, 0, 4, 6, 1751, 3703   ;t5,  t4
2632*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 0, 4, 6, 3857, 1380   ;t13, t12
2633*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m7, m1                   ;t12a
2634*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m7                       ;t4a
2635*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m5, m2                   ;t13a
2636*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m2                       ;t5a
2637*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            4, 0, 7, 2, 6, 4017,  799   ;t12, t13
2638*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*8]     ;in1
2639*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*9]     ;in14
2640*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*8], m4                       ;t12
2641*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*9], m0                       ;t13
2642*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*4]     ;in9
2643*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*5]     ;in6
2644*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*4], m1                       ;t4a
2645*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*5], m5                       ;t5a
2646*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 7, 1, 5, 6, 4052,  601   ;t15, t14
2647*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            4, 0, 1, 5, 6, 2440, 3290   ;t7,  t6
2648*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m0, m7                   ;t14a
2649*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m7                       ;t6a
2650*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m4, m2                   ;t15a
2651*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m2                       ;t7a
2652*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 1, 7, 2, 6, 2276, 3406   ;t14, t15
2653*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*2]     ;t2a
2654*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*2], m5                       ;t14
2655*c0909341SAndroid Build Coastguard Worker    psubsw                  m7, m2, m0                   ;t6
2656*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                       ;t2
2657*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m4                   ;t7
2658*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m4                       ;t3
2659*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 7, 4, 5, 6, 3784, 1567   ;t6a, t7a
2660*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*7]     ;in0
2661*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+32*5]     ;in15
2662*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*7], m3                       ;t3
2663*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+32*5], m1                       ;t15
2664*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*6]     ;in7
2665*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*3]     ;in8
2666*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*6], m7                       ;t7a
2667*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*3], m0                       ;t6a
2668*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 0, 7, 6,  201, 4091   ;t1,  t0
2669*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            1, 3, 0, 7, 6, 3035, 2751   ;t9,  t8
2670*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m4, m3                   ;t8a
2671*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m3                       ;t0a
2672*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m5, m1                   ;t9a
2673*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m1                       ;t1a
2674*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 1, 7, 6,  799, 4017   ;t9,  t8
2675*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*4]     ;t4a
2676*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*5]     ;t5a
2677*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*4], m3                       ;t8
2678*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*5], m0                       ;t9
2679*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m4, m1                   ;t4
2680*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m1                       ;t0
2681*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m5, m7                   ;t5
2682*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m7                       ;t1
2683*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 1, 7, 6, 1567, 3784   ;t5a, t4a
2684*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize*2+16*3]     ;t6a
2685*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m4, m2                   ;t2a
2686*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m2                       ;out0
2687*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*3], m4                       ;out0
2688*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*6]     ;t7a
2689*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m3, m7                   ;t6
2690*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m7                       ;-out3
2691*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*6], m3                       ;-out3
2692*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m0, m4                   ;t7
2693*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m4                       ;out12
2694*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*12], m3
2695*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*7]     ;t3
2696*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 7], m2                       ;out4
2697*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m5, m3                   ;t3a
2698*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m3                       ;-out15
2699*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*11], m2
2700*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+32*5]     ;t15
2701*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*10], m1                       ;-out7
2702*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*0]     ;t11
2703*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*0 ], m5                       ;-out15
2704*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*1]     ;t10
2705*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*1 ], m4                       ;-out11
2706*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*2]     ;t14
2707*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*2 ], m0                       ;out12
2708*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m4                   ;t14a
2709*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m4                       ;t10a
2710*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m2                   ;t15a
2711*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m2                       ;t11a
2712*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 0, 2, 4, 6, 3784, 1567   ;t14, t15
2713*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*4]     ;t8
2714*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*5]     ;t9
2715*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*4], m3                       ;t10a
2716*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*5], m1                       ;t11a
2717*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*8]     ;t12
2718*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*9]     ;t13
2719*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*8], m5                       ;t14
2720*c0909341SAndroid Build Coastguard Worker    mova  [rsp+gprsize*2+16*9], m0                       ;t15
2721*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                   ;t12a
2722*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m3                       ;t8a
2723*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m4, m1                   ;t13a
2724*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m1                       ;t9a
2725*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 0, 1, 3, 6, 1567, 3784   ;t13, t12
2726*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*4]     ;t10a
2727*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*5]     ;t11a
2728*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m2, m6                   ;t10
2729*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m6                       ;-out1
2730*c0909341SAndroid Build Coastguard Worker    paddsw                  m6, m4, m1                   ;out14
2731*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1                       ;t11
2732*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*14], m4
2733*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 4], m2                       ;-out1
2734*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*8]     ;t14
2735*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*9]     ;t15
2736*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 9], m3                       ;out6
2737*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m0, m4                   ;t14a
2738*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m4                       ;out2
2739*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m5, m2                   ;t15a
2740*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m2                       ;-out13
2741*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 5], m0                       ;out2
2742*c0909341SAndroid Build Coastguard Worker    ret
2743*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2744*c0909341SAndroid Build Coastguard Worker.main_pass1_end:
2745*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*14]
2746*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*14], m5
2747*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*15], m6
2748*c0909341SAndroid Build Coastguard Worker    mova                    m5, [o(pw_2896_2896)]
2749*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_2896_m2896)]
2750*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
2751*c0909341SAndroid Build Coastguard Worker    punpcklwd               m2, m3, m4
2752*c0909341SAndroid Build Coastguard Worker    punpckhwd               m3, m4
2753*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m4, m5, m2
2754*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m2, m6
2755*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m1, m5, m3
2756*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m3, m6
2757*c0909341SAndroid Build Coastguard Worker    REPX         {paddd x, m7}, m4, m2, m1, m3
2758*c0909341SAndroid Build Coastguard Worker    REPX         {psrad x, 12}, m4, m1, m2, m3
2759*c0909341SAndroid Build Coastguard Worker    packssdw                m4, m1                       ;-out5
2760*c0909341SAndroid Build Coastguard Worker    packssdw                m2, m3                       ;out10
2761*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 8], m4
2762*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16* 9]
2763*c0909341SAndroid Build Coastguard Worker    punpcklwd               m1, m3, m0
2764*c0909341SAndroid Build Coastguard Worker    punpckhwd               m3, m0
2765*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m0, m5, m1
2766*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m1, m6
2767*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m4, m5, m3
2768*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m3, m6
2769*c0909341SAndroid Build Coastguard Worker    REPX         {paddd x, m7}, m0, m1, m4, m3
2770*c0909341SAndroid Build Coastguard Worker    REPX         {psrad x, 12}, m0, m4, m1, m3
2771*c0909341SAndroid Build Coastguard Worker    packssdw                m0, m4                       ;out6
2772*c0909341SAndroid Build Coastguard Worker    packssdw                m1, m3                       ;-out9
2773*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 9], m0
2774*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16* 7]
2775*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*12]
2776*c0909341SAndroid Build Coastguard Worker    punpcklwd               m3, m0, m4
2777*c0909341SAndroid Build Coastguard Worker    punpckhwd               m0, m4
2778*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m4, m5, m3
2779*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m3, m6
2780*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m5, m0
2781*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m0, m6
2782*c0909341SAndroid Build Coastguard Worker    REPX         {paddd x, m7}, m4, m3, m5, m0
2783*c0909341SAndroid Build Coastguard Worker    REPX         {psrad x, 12}, m4, m5, m3, m0
2784*c0909341SAndroid Build Coastguard Worker    packssdw                m4, m5                       ;out4
2785*c0909341SAndroid Build Coastguard Worker    packssdw                m3, m0                       ;-out11
2786*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 7], m4
2787*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*10]
2788*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*11]
2789*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m4, m5
2790*c0909341SAndroid Build Coastguard Worker    punpckhwd               m4, m5
2791*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m5, m0, [o(pw_2896_2896)]
2792*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m0, m6
2793*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m6, m4
2794*c0909341SAndroid Build Coastguard Worker    pmaddwd                 m4, [o(pw_2896_2896)]
2795*c0909341SAndroid Build Coastguard Worker    REPX         {paddd x, m7}, m5, m0, m6, m4
2796*c0909341SAndroid Build Coastguard Worker    REPX         {psrad x, 12}, m0, m6, m5, m4
2797*c0909341SAndroid Build Coastguard Worker    packssdw                m0, m6                       ;out8
2798*c0909341SAndroid Build Coastguard Worker    packssdw                m5, m4                       ;-out7
2799*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*10], m5
2800*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16* 2]    ;out12
2801*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*14]    ;-out13
2802*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*15]    ;out14
2803*c0909341SAndroid Build Coastguard Worker    ret
2804*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2805*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end
2806*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2896x8)]
2807*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16* 9]
2808*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*14]
2809*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1, m2
2810*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m2
2811*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7                       ;out6
2812*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7                       ;-out9
2813*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 9], m0
2814*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m3, m4
2815*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m4
2816*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7                       ;out10
2817*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7                       ;-out5
2818*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 8], m3
2819*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16* 7]
2820*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*12]
2821*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m3, m4
2822*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m4
2823*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7                       ;out4
2824*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7                       ;-out11
2825*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16* 7], m0
2826*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*10]
2827*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m0, [rsp+gprsize*2+16*11]
2828*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, [rsp+gprsize*2+16*11]
2829*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m7                       ;-out7
2830*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7                       ;out8
2831*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*10], m4
2832*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*2 ]    ;out12
2833*c0909341SAndroid Build Coastguard Worker    ret
2834*c0909341SAndroid Build Coastguard Worker
2835*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct
2836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst
2837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst
2838*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity
2839*c0909341SAndroid Build Coastguard Worker
2840*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2841*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2896x8)]
2842*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*0 ]
2843*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*1 ]
2844*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*14]
2845*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*15]
2846*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
2847*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
2848*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m2
2849*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m3
2850*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*6 ]
2851*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*7 ]
2852*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*8 ]
2853*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*9 ]
2854*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m2
2855*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m3
2856*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m0
2857*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m1
2858*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m7, [coeffq+16*2 ]
2859*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m7, [coeffq+16*3 ]
2860*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m7, [coeffq+16*4 ]
2861*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m7, [coeffq+16*5 ]
2862*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m7, [coeffq+16*10]
2863*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m5, m7, [coeffq+16*11]
2864*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m6, m7, [coeffq+16*12]
2865*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7,     [coeffq+16*13]
2866*c0909341SAndroid Build Coastguard Worker
2867*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
2868*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass1_end
2869*c0909341SAndroid Build Coastguard Worker
2870*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
2871*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*0, 32
2872*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
2873*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
2874*c0909341SAndroid Build Coastguard Worker    mov                     r3, tx2q
2875*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
2876*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_8bpc).pass1_end
2877*c0909341SAndroid Build Coastguard Worker
2878*c0909341SAndroid Build Coastguard Worker.pass1_end:
2879*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*1, 32
2880*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 32
2881*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
2882*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
2883*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_8bpc).pass1_end
2884*c0909341SAndroid Build Coastguard Worker
2885*c0909341SAndroid Build Coastguard Worker.pass2:
2886*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
2887*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
2888*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_8bpc).pass2_main
2889*c0909341SAndroid Build Coastguard Worker
2890*c0909341SAndroid Build Coastguard Worker.end:
2891*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 32
2892*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2893*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
2894*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_8bpc).pass2_main
2895*c0909341SAndroid Build Coastguard Worker
2896*c0909341SAndroid Build Coastguard Worker
2897*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct
2898*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst
2899*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst
2900*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity
2901*c0909341SAndroid Build Coastguard Worker
2902*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2903*c0909341SAndroid Build Coastguard Worker    add                coeffq, 16*16
2904*c0909341SAndroid Build Coastguard Worker    mova                   m4, [coeffq-16*7]
2905*c0909341SAndroid Build Coastguard Worker    mova                   m5, [coeffq-16*5]
2906*c0909341SAndroid Build Coastguard Worker    mova                   m6, [coeffq-16*3]
2907*c0909341SAndroid Build Coastguard Worker    mova                   m7, [coeffq-16*1]
2908*c0909341SAndroid Build Coastguard Worker    mov                    r3, tx2q
2909*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.pass1_end)]
2910*c0909341SAndroid Build Coastguard Worker
2911*c0909341SAndroid Build Coastguard Worker.pass1:
2912*c0909341SAndroid Build Coastguard Worker    mova                   m0, [o(pw_2896x8)]
2913*c0909341SAndroid Build Coastguard Worker    mova                   m2, [o(pw_1697x16)]
2914*c0909341SAndroid Build Coastguard Worker    mova                   m3, [o(pw_16384)]
2915*c0909341SAndroid Build Coastguard Worker    sub                coeffq, 8*16
2916*c0909341SAndroid Build Coastguard Worker    REPX     {pmulhrsw x, m0}, m4, m5, m6, m7
2917*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m1, m2, m4
2918*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m1, m3
2919*c0909341SAndroid Build Coastguard Worker    paddsw                 m1, m4 ; 1
2920*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m4, m2, m5
2921*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m4, m3
2922*c0909341SAndroid Build Coastguard Worker    paddsw                 m4, m5 ; 3
2923*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m5, m2, m6
2924*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m5, m3
2925*c0909341SAndroid Build Coastguard Worker    paddsw                 m5, m6 ; 5
2926*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m2, m7
2927*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m3
2928*c0909341SAndroid Build Coastguard Worker    paddsw                 m7, m6 ; 7
2929*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m0, [coeffq+16*6]
2930*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m4
2931*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m4, m2, m6
2932*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m4, m3
2933*c0909341SAndroid Build Coastguard Worker    paddsw                 m6, m4 ; 6
2934*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m4, m0, [coeffq+16*4]
2935*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*1], m6
2936*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m2, m4
2937*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m3
2938*c0909341SAndroid Build Coastguard Worker    paddsw                 m4, m6 ; 4
2939*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m0, [coeffq+16*2]
2940*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m0,     [coeffq+16*0]
2941*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m2, m6
2942*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m2, m3
2943*c0909341SAndroid Build Coastguard Worker    paddsw                 m2, m6 ; 2
2944*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m0, [o(pw_1697x16)]
2945*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m6, m3
2946*c0909341SAndroid Build Coastguard Worker    mova                   m3, [rsp+gprsize+16*0]
2947*c0909341SAndroid Build Coastguard Worker    paddsw                 m0, m6
2948*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end3
2949*c0909341SAndroid Build Coastguard Worker
2950*c0909341SAndroid Build Coastguard Worker.pass1_end:
2951*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*1], m4
2952*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*3], m5
2953*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*5], m6
2954*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*7], m7
2955*c0909341SAndroid Build Coastguard Worker    mova                   m4, [coeffq-16*7]
2956*c0909341SAndroid Build Coastguard Worker    mova                   m5, [coeffq-16*5]
2957*c0909341SAndroid Build Coastguard Worker    mova                   m6, [coeffq-16*3]
2958*c0909341SAndroid Build Coastguard Worker    mova                   m7, [coeffq-16*1]
2959*c0909341SAndroid Build Coastguard Worker    mova        [coeffq-16*7], m0
2960*c0909341SAndroid Build Coastguard Worker    mova        [coeffq-16*5], m1
2961*c0909341SAndroid Build Coastguard Worker    mova        [coeffq-16*3], m2
2962*c0909341SAndroid Build Coastguard Worker    mova        [coeffq-16*1], m3
2963*c0909341SAndroid Build Coastguard Worker    mov                  tx2q, r3
2964*c0909341SAndroid Build Coastguard Worker    jmp .pass1
2965*c0909341SAndroid Build Coastguard Worker
2966*c0909341SAndroid Build Coastguard Worker.pass2:
2967*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end)]
2968*c0909341SAndroid Build Coastguard Worker    lea                    r3, [dstq+8]
2969*c0909341SAndroid Build Coastguard Worker    jmp  m(iidentity_8x8_internal_8bpc).end
2970*c0909341SAndroid Build Coastguard Worker
2971*c0909341SAndroid Build Coastguard Worker.end:
2972*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*1, 32
2973*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
2974*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
2975*c0909341SAndroid Build Coastguard Worker    jmp  m(iidentity_8x8_internal_8bpc).end
2976*c0909341SAndroid Build Coastguard Worker
2977*c0909341SAndroid Build Coastguard Worker
2978*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2 ; type1, type2
2979*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 16x16, 8, 16*16
2980*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
2981*c0909341SAndroid Build Coastguard Worker    movd                   m1, [o(pw_2896x8)]
2982*c0909341SAndroid Build Coastguard Worker    pmulhrsw               m0, m1, [coeffq]
2983*c0909341SAndroid Build Coastguard Worker    movd                   m2, [o(pw_8192)]
2984*c0909341SAndroid Build Coastguard Worker    mov              [coeffq], eobd
2985*c0909341SAndroid Build Coastguard Worker    mov                   r2d, 8
2986*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(m(inv_txfm_add_dct_dct_16x16_8bpc).end)]
2987*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly
2988*c0909341SAndroid Build Coastguard Worker.end:
2989*c0909341SAndroid Build Coastguard Worker    RET
2990*c0909341SAndroid Build Coastguard Worker%endif
2991*c0909341SAndroid Build Coastguard Worker%endmacro
2992*c0909341SAndroid Build Coastguard Worker
2993*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct
2994*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst
2995*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst
2996*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity
2997*c0909341SAndroid Build Coastguard Worker
2998*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
2999*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 64
3000*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3001*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
3002*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*3, 64
3003*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
3004*c0909341SAndroid Build Coastguard Worker    mov                     r3, tx2q
3005*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
3006*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3007*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3008*c0909341SAndroid Build Coastguard Worker
3009*c0909341SAndroid Build Coastguard Worker.pass1_end:
3010*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*17, 32
3011*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3012*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3013*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
3014*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3015*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3016*c0909341SAndroid Build Coastguard Worker
3017*c0909341SAndroid Build Coastguard Worker.pass1_end1:
3018*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*1, 32
3019*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 64
3020*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3021*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
3022*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*2, 64
3023*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
3024*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
3025*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3026*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3027*c0909341SAndroid Build Coastguard Worker
3028*c0909341SAndroid Build Coastguard Worker.pass1_end2:
3029*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*16, 32
3030*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3031*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3032*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
3033*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3034*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3035*c0909341SAndroid Build Coastguard Worker
3036*c0909341SAndroid Build Coastguard Worker.pass2:
3037*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
3038*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x16_internal_8bpc).pass2_pre
3039*c0909341SAndroid Build Coastguard Worker
3040*c0909341SAndroid Build Coastguard Worker.end:
3041*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3042*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3043*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end1)]
3044*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3045*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
3046*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end
3047*c0909341SAndroid Build Coastguard Worker
3048*c0909341SAndroid Build Coastguard Worker.end1:
3049*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
3050*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
3051*c0909341SAndroid Build Coastguard Worker
3052*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 32*8
3053*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3054*c0909341SAndroid Build Coastguard Worker
3055*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]
3056*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*4 ]
3057*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*8 ]
3058*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*12]
3059*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*1 ]
3060*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*5 ]
3061*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*9 ]
3062*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*13]
3063*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x16_internal_8bpc).end)]
3064*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x16_internal_8bpc).pass2_main
3065*c0909341SAndroid Build Coastguard Worker
3066*c0909341SAndroid Build Coastguard Worker
3067*c0909341SAndroid Build Coastguard Worker%macro ITX_16X16_ADST_LOAD_ODD_COEFS 0
3068*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*1 ]
3069*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*3 ]
3070*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*29]
3071*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*31]
3072*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
3073*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
3074*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m2
3075*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m3
3076*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*13]
3077*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*15]
3078*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*17]
3079*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*19]
3080*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m2
3081*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m3
3082*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m0
3083*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m1
3084*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*5 ]
3085*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*7 ]
3086*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]
3087*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*11]
3088*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*21]
3089*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*23]
3090*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*25]
3091*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*27]
3092*c0909341SAndroid Build Coastguard Worker%endmacro
3093*c0909341SAndroid Build Coastguard Worker
3094*c0909341SAndroid Build Coastguard Worker%macro ITX_16X16_ADST_LOAD_EVEN_COEFS 0
3095*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]
3096*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*2 ]
3097*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*28]
3098*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*30]
3099*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m0
3100*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m1
3101*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*9], m2
3102*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+32*5], m3
3103*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*12]
3104*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*14]
3105*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*16]
3106*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*18]
3107*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*3], m2
3108*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*4], m3
3109*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m0
3110*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m1
3111*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]
3112*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*6 ]
3113*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*8 ]
3114*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*10]
3115*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*20]
3116*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*22]
3117*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*24]
3118*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*26]
3119*c0909341SAndroid Build Coastguard Worker%endmacro
3120*c0909341SAndroid Build Coastguard Worker
3121*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct
3122*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst
3123*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst
3124*c0909341SAndroid Build Coastguard Worker
3125*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
3126*c0909341SAndroid Build Coastguard Worker    ITX_16X16_ADST_LOAD_ODD_COEFS
3127*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
3128*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass1_end
3129*c0909341SAndroid Build Coastguard Worker
3130*c0909341SAndroid Build Coastguard Worker    mov                     r3, tx2q
3131*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
3132*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3133*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).pass1_end1
3134*c0909341SAndroid Build Coastguard Worker
3135*c0909341SAndroid Build Coastguard Worker.pass1_end:
3136*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*17, 32
3137*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3138*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3139*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
3140*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3141*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).pass1_end1
3142*c0909341SAndroid Build Coastguard Worker
3143*c0909341SAndroid Build Coastguard Worker.pass1_end1:
3144*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*1, 32
3145*c0909341SAndroid Build Coastguard Worker    ITX_16X16_ADST_LOAD_EVEN_COEFS
3146*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
3147*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass1_end
3148*c0909341SAndroid Build Coastguard Worker
3149*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
3150*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3151*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).pass1_end1
3152*c0909341SAndroid Build Coastguard Worker
3153*c0909341SAndroid Build Coastguard Worker.pass1_end2:
3154*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*16, 32
3155*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3156*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3157*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
3158*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3159*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).pass1_end1
3160*c0909341SAndroid Build Coastguard Worker
3161*c0909341SAndroid Build Coastguard Worker.pass2:
3162*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
3163*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x16_internal_8bpc).pass2_pre
3164*c0909341SAndroid Build Coastguard Worker
3165*c0909341SAndroid Build Coastguard Worker.end:
3166*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3167*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3168*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end1)]
3169*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3170*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
3171*c0909341SAndroid Build Coastguard Worker    jmp  m(iadst_8x8_internal_8bpc).end
3172*c0909341SAndroid Build Coastguard Worker
3173*c0909341SAndroid Build Coastguard Worker.end1:
3174*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
3175*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
3176*c0909341SAndroid Build Coastguard Worker
3177*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 32*8
3178*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3179*c0909341SAndroid Build Coastguard Worker
3180*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*0 ]
3181*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*2 ]
3182*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]
3183*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*6 ]
3184*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*8 ]
3185*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*10]
3186*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*12]
3187*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*14]
3188*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m4
3189*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m5
3190*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m6
3191*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m7
3192*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(iadst_8x16_internal_8bpc).end)]
3193*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x16_internal_8bpc).pass2_main
3194*c0909341SAndroid Build Coastguard Worker
3195*c0909341SAndroid Build Coastguard Worker
3196*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct
3197*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst
3198*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst
3199*c0909341SAndroid Build Coastguard Worker
3200*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
3201*c0909341SAndroid Build Coastguard Worker    ITX_16X16_ADST_LOAD_ODD_COEFS
3202*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
3203*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass1_end
3204*c0909341SAndroid Build Coastguard Worker
3205*c0909341SAndroid Build Coastguard Worker    mov                     r3, tx2q
3206*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
3207*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_m8192)]
3208*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).pass1_end1
3209*c0909341SAndroid Build Coastguard Worker
3210*c0909341SAndroid Build Coastguard Worker.pass1_end:
3211*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*1, 32
3212*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3213*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3214*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
3215*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_m8192)]
3216*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).pass1_end1
3217*c0909341SAndroid Build Coastguard Worker
3218*c0909341SAndroid Build Coastguard Worker.pass1_end1:
3219*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*17, 32
3220*c0909341SAndroid Build Coastguard Worker    ITX_16X16_ADST_LOAD_EVEN_COEFS
3221*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
3222*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass1_end
3223*c0909341SAndroid Build Coastguard Worker
3224*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
3225*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*0, 32
3226*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3227*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3228*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
3229*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_m8192)]
3230*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).pass1_end1
3231*c0909341SAndroid Build Coastguard Worker
3232*c0909341SAndroid Build Coastguard Worker.pass1_end2:
3233*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*16, 32
3234*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16* 0, 32
3235*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3236*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
3237*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_m8192)]
3238*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_8bpc).pass1_end1
3239*c0909341SAndroid Build Coastguard Worker
3240*c0909341SAndroid Build Coastguard Worker.pass2:
3241*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
3242*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
3243*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x16_internal_8bpc).pass2_pre
3244*c0909341SAndroid Build Coastguard Worker
3245*c0909341SAndroid Build Coastguard Worker.end:
3246*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3247*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3248*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end1)]
3249*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3250*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).end
3251*c0909341SAndroid Build Coastguard Worker
3252*c0909341SAndroid Build Coastguard Worker.end1:
3253*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
3254*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
3255*c0909341SAndroid Build Coastguard Worker
3256*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 32*8
3257*c0909341SAndroid Build Coastguard Worker
3258*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*0 ]
3259*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*2 ]
3260*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]
3261*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*6 ]
3262*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*8 ]
3263*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*10]
3264*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*12]
3265*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*14]
3266*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*7], m4
3267*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*8], m5
3268*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*5], m6
3269*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*6], m7
3270*c0909341SAndroid Build Coastguard Worker
3271*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end2)]
3272*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3273*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x16_internal_8bpc).pass2_main
3274*c0909341SAndroid Build Coastguard Worker
3275*c0909341SAndroid Build Coastguard Worker.end2:
3276*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
3277*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3278*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
3279*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3280*c0909341SAndroid Build Coastguard Worker    jmp  m(iflipadst_8x8_internal_8bpc).end
3281*c0909341SAndroid Build Coastguard Worker
3282*c0909341SAndroid Build Coastguard Worker
3283*c0909341SAndroid Build Coastguard Worker%macro IDTX16B 3 ; src/dst, tmp, pw_1697x16
3284*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%2, m%3, m%1
3285*c0909341SAndroid Build Coastguard Worker    psraw               m%2, 1
3286*c0909341SAndroid Build Coastguard Worker    pavgw               m%1, m%2
3287*c0909341SAndroid Build Coastguard Worker%endmacro
3288*c0909341SAndroid Build Coastguard Worker
3289*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct
3290*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity
3291*c0909341SAndroid Build Coastguard Worker
3292*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
3293*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*17
3294*c0909341SAndroid Build Coastguard Worker    mov                     r3, tx2q
3295*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
3296*c0909341SAndroid Build Coastguard Worker
3297*c0909341SAndroid Build Coastguard Worker.pass1:
3298*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_1697x16)]
3299*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+32*6]
3300*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+32*0]
3301*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+32*1]
3302*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+32*2]
3303*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+32*3]
3304*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+32*4]
3305*c0909341SAndroid Build Coastguard Worker    REPX     {IDTX16B x, 5, 6}, 7, 0, 1, 2, 3, 4
3306*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+32*5]
3307*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m7
3308*c0909341SAndroid Build Coastguard Worker    IDTX16B                  5, 7, 6
3309*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+32*7]
3310*c0909341SAndroid Build Coastguard Worker    IDTX16B                  7, 6, 6
3311*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end3
3312*c0909341SAndroid Build Coastguard Worker
3313*c0909341SAndroid Build Coastguard Worker.pass1_end:
3314*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS          coeffq, 32
3315*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, 16
3316*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
3317*c0909341SAndroid Build Coastguard Worker    jmp .pass1
3318*c0909341SAndroid Build Coastguard Worker
3319*c0909341SAndroid Build Coastguard Worker.pass1_end1:
3320*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS          coeffq, 32
3321*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, 15*16
3322*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
3323*c0909341SAndroid Build Coastguard Worker    jmp .pass1
3324*c0909341SAndroid Build Coastguard Worker
3325*c0909341SAndroid Build Coastguard Worker.pass1_end2:
3326*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS          coeffq, 32
3327*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, 16
3328*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
3329*c0909341SAndroid Build Coastguard Worker    jmp .pass1
3330*c0909341SAndroid Build Coastguard Worker
3331*c0909341SAndroid Build Coastguard Worker.pass2:
3332*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
3333*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end1)]
3334*c0909341SAndroid Build Coastguard Worker
3335*c0909341SAndroid Build Coastguard Worker.end:
3336*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
3337*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m4
3338*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_1697x16)]
3339*c0909341SAndroid Build Coastguard Worker    REPX      {IDTX16 x, 4, 7}, 5, 6, 0, 1, 2, 3
3340*c0909341SAndroid Build Coastguard Worker    mova                    m4, [o(pw_2048)]
3341*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m5, m4
3342*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m6, m4
3343*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*2], m5
3344*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize+16*1]
3345*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*1], m6
3346*c0909341SAndroid Build Coastguard Worker    IDTX16                   5, 6, 7
3347*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize+16*0]
3348*c0909341SAndroid Build Coastguard Worker    IDTX16                   6, 7, 7
3349*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m4}, m0, m1, m2, m3, m6
3350*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m5
3351*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m6
3352*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end3
3353*c0909341SAndroid Build Coastguard Worker
3354*c0909341SAndroid Build Coastguard Worker.end1:
3355*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 32
3356*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end2)]
3357*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3358*c0909341SAndroid Build Coastguard Worker    jmp .end
3359*c0909341SAndroid Build Coastguard Worker
3360*c0909341SAndroid Build Coastguard Worker.end2:
3361*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
3362*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
3363*c0909341SAndroid Build Coastguard Worker
3364*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 32*8
3365*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 32
3366*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end3)]
3367*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3368*c0909341SAndroid Build Coastguard Worker    jmp .end
3369*c0909341SAndroid Build Coastguard Worker
3370*c0909341SAndroid Build Coastguard Worker.end3:
3371*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 32
3372*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x16_internal_8bpc).end1)]
3373*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3374*c0909341SAndroid Build Coastguard Worker    jmp .end
3375*c0909341SAndroid Build Coastguard Worker
3376*c0909341SAndroid Build Coastguard Worker
3377*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2
3378*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
3379*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
3380*c0909341SAndroid Build Coastguard Worker%endif
3381*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
3382*c0909341SAndroid Build Coastguard Worker    jz .dconly
3383*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x32_internal_8bpc)
3384*c0909341SAndroid Build Coastguard Worker    RET
3385*c0909341SAndroid Build Coastguard Worker
3386*c0909341SAndroid Build Coastguard Worker.dconly:
3387*c0909341SAndroid Build Coastguard Worker    movd                 m1, [o(pw_2896x8)]
3388*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, [coeffq]
3389*c0909341SAndroid Build Coastguard Worker    movd                 m2, [o(pw_8192)]
3390*c0909341SAndroid Build Coastguard Worker    mov            [coeffq], eobd
3391*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
3392*c0909341SAndroid Build Coastguard Worker    psrlw                m2, 2            ;pw_2048
3393*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
3394*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2
3395*c0909341SAndroid Build Coastguard Worker    pshuflw              m0, m0, q0000
3396*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m0
3397*c0909341SAndroid Build Coastguard Worker    mov                 r3d, 8
3398*c0909341SAndroid Build Coastguard Worker    lea                tx2q, [o(.end)]
3399*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_8bpc).loop
3400*c0909341SAndroid Build Coastguard Worker
3401*c0909341SAndroid Build Coastguard Worker.end:
3402*c0909341SAndroid Build Coastguard Worker    RET
3403*c0909341SAndroid Build Coastguard Worker
3404*c0909341SAndroid Build Coastguard Worker
3405*c0909341SAndroid Build Coastguard Worker
3406*c0909341SAndroid Build Coastguard Workercglobal idct_8x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
3407*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 106
3408*c0909341SAndroid Build Coastguard Worker    jle .fast
3409*c0909341SAndroid Build Coastguard Worker
3410*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*3, 64
3411*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3412*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3413*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1)]
3414*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3415*c0909341SAndroid Build Coastguard Worker
3416*c0909341SAndroid Build Coastguard Worker.pass1:
3417*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*9 ], m0                        ;in24
3418*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*10], m4                        ;in28
3419*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*17], m2                        ;in26
3420*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*18], m6                        ;in30
3421*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m1                        ;in25
3422*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m3                        ;in27
3423*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m5                        ;in29
3424*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
3425*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*2, 64
3426*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3427*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3428*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_1)]
3429*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3430*c0909341SAndroid Build Coastguard Worker
3431*c0909341SAndroid Build Coastguard Worker.pass1_1:
3432*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*7 ], m0                        ;in16
3433*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*8 ], m4                        ;in20
3434*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*15], m2                        ;in18
3435*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*16], m6                        ;in22
3436*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m1                        ;in17
3437*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m3                        ;in19
3438*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m5                        ;in21
3439*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m7                        ;in23
3440*c0909341SAndroid Build Coastguard Worker
3441*c0909341SAndroid Build Coastguard Worker.fast:
3442*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 64
3443*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3444*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3445*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
3446*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3447*c0909341SAndroid Build Coastguard Worker
3448*c0909341SAndroid Build Coastguard Worker.pass1_end:
3449*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*5 ], m0                        ;in8
3450*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*6 ], m4                        ;in12
3451*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*13], m2                        ;in10
3452*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*14], m6                        ;in14
3453*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m1                        ;in9
3454*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m3                        ;in11
3455*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m5                        ;in13
3456*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
3457*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 64
3458*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3459*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3460*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
3461*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3462*c0909341SAndroid Build Coastguard Worker
3463*c0909341SAndroid Build Coastguard Worker.pass1_end1:
3464*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*11], m2                        ;in2
3465*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*12], m6                        ;in6
3466*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m1                        ;in1
3467*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m3                        ;in3
3468*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m5                        ;in5
3469*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m7                        ;in7
3470*c0909341SAndroid Build Coastguard Worker    mova                    m1, m4                        ;in4
3471*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize+16*5 ]       ;in8
3472*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize+16*6 ]       ;in12
3473*c0909341SAndroid Build Coastguard Worker
3474*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 106
3475*c0909341SAndroid Build Coastguard Worker    jg .full
3476*c0909341SAndroid Build Coastguard Worker
3477*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
3478*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
3479*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3480*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS   rsp+gprsize+16*3 , 16
3481*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize+16*11]
3482*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize+16*12]
3483*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize+16*13]
3484*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize+16*14]
3485*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
3486*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
3487*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
3488*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
3489*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
3490*c0909341SAndroid Build Coastguard Worker
3491*c0909341SAndroid Build Coastguard Worker    call .main_fast
3492*c0909341SAndroid Build Coastguard Worker    jmp  .pass2
3493*c0909341SAndroid Build Coastguard Worker
3494*c0909341SAndroid Build Coastguard Worker.full:
3495*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize+16*7 ]       ;in16
3496*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize+16*8 ]       ;in20
3497*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize+16*9 ]       ;in24
3498*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*10]       ;in28
3499*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3500*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS   rsp+gprsize+16*3 , 16
3501*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
3502*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
3503*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
3504*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
3505*c0909341SAndroid Build Coastguard Worker    call .main
3506*c0909341SAndroid Build Coastguard Worker
3507*c0909341SAndroid Build Coastguard Worker.pass2:
3508*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.end6)]
3509*c0909341SAndroid Build Coastguard Worker
3510*c0909341SAndroid Build Coastguard Worker.end:
3511*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3512*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end2)]
3513*c0909341SAndroid Build Coastguard Worker
3514*c0909341SAndroid Build Coastguard Worker.end1:
3515*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
3516*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  \
3517*c0909341SAndroid Build Coastguard Worker                                     8,  9,  10, 11, 12, 13, 14, 15, \
3518*c0909341SAndroid Build Coastguard Worker                                     16, 17, 18, 19, 20, 21, 22, 23, \
3519*c0909341SAndroid Build Coastguard Worker                                     24, 25, 26, 27, 28, 29, 30, 31
3520*c0909341SAndroid Build Coastguard Worker
3521*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
3522*c0909341SAndroid Build Coastguard Worker
3523*c0909341SAndroid Build Coastguard Worker.end2:
3524*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end3)]
3525*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end
3526*c0909341SAndroid Build Coastguard Worker
3527*c0909341SAndroid Build Coastguard Worker.end3:
3528*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
3529*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3530*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3531*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end4)]
3532*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end
3533*c0909341SAndroid Build Coastguard Worker
3534*c0909341SAndroid Build Coastguard Worker.end4:
3535*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
3536*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3537*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3538*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end5)]
3539*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end
3540*c0909341SAndroid Build Coastguard Worker
3541*c0909341SAndroid Build Coastguard Worker.end5:
3542*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
3543*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3544*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
3545*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
3546*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).end
3547*c0909341SAndroid Build Coastguard Worker
3548*c0909341SAndroid Build Coastguard Worker.end6:
3549*c0909341SAndroid Build Coastguard Worker    ret
3550*c0909341SAndroid Build Coastguard Worker
3551*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3552*c0909341SAndroid Build Coastguard Workercglobal_label .main_veryfast
3553*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*19]     ;in1
3554*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4091x8)]        ;t30,t31
3555*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_201x8)]             ;t16,t17
3556*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
3557*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*19], m0                        ;t16
3558*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*34], m3                        ;t31
3559*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7,  799, 4017    ;t17a, t30a
3560*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m3                        ;t17a
3561*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m0                        ;t30a
3562*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*22]     ;in7
3563*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3857x8)]        ;t28,t29
3564*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1380x8)]           ;t18,t19
3565*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*22], m1                        ;t19
3566*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*31], m2                        ;t28
3567*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 0, 3, 7, m4017, 799    ;t18a, t29a
3568*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m2                        ;t18a
3569*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m1                        ;t29a
3570*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*23]     ;in5
3571*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3973x8)]        ;t26, t27
3572*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_995x8)]             ;t20, t21
3573*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*23], m0                        ;t20
3574*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m3                        ;t27
3575*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7, 3406, 2276    ;t21a, t26a
3576*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m3                        ;t21a
3577*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m0                        ;t26a
3578*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*26]     ;in3
3579*c0909341SAndroid Build Coastguard Worker    pxor                    m0, m0
3580*c0909341SAndroid Build Coastguard Worker    mova                    m3, m0
3581*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m2, [o(pw_4052x8)]
3582*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, [o(pw_m601x8)]
3583*c0909341SAndroid Build Coastguard Worker    jmp .main2
3584*c0909341SAndroid Build Coastguard Worker
3585*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3586*c0909341SAndroid Build Coastguard Workercglobal_label .main_fast ;bottom half is zero
3587*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*19]     ;in1
3588*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*20]     ;in15
3589*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4091x8)]        ;t31a
3590*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_201x8)]             ;t16a
3591*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3035x8)]        ;t30a
3592*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m2751x8)]           ;t17a
3593*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
3594*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t17
3595*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t16
3596*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t30
3597*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t31
3598*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7,  799, 4017    ;t17a, t30a
3599*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*19], m0                        ;t16
3600*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m5                        ;t17a
3601*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m4                        ;t30a
3602*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*34], m3                        ;t31
3603*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*21]     ;in9
3604*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*22]     ;in7
3605*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3703x8)]
3606*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_1751x8)]
3607*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3857x8)]
3608*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1380x8)]
3609*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m0                    ;t18
3610*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t19
3611*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                    ;t29
3612*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t28
3613*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m4017, 799    ;t18a, t29a
3614*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m5                        ;t18a
3615*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*22], m0                        ;t19
3616*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*31], m3                        ;t28
3617*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m4                        ;t29a
3618*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*23]     ;in5
3619*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*24]     ;in11
3620*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3973x8)]
3621*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_995x8)]
3622*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3513x8)]
3623*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m2106x8)]
3624*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t21
3625*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t20
3626*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t26
3627*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t27
3628*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 3406, 2276    ;t21a, t26a
3629*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*23], m0                        ;t20
3630*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m5                        ;t21a
3631*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m4                        ;t26a
3632*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m3                        ;t27
3633*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*25]     ;in13
3634*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*26]     ;in3
3635*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3290x8)]
3636*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_2440x8)]
3637*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, m2, [o(pw_4052x8)]
3638*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, [o(pw_m601x8)]
3639*c0909341SAndroid Build Coastguard Worker    jmp .main2
3640*c0909341SAndroid Build Coastguard Worker
3641*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3642*c0909341SAndroid Build Coastguard Workercglobal_label .main
3643*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
3644*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*19]     ;in1
3645*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*20]     ;in15
3646*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*33]     ;in17
3647*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*34]     ;in31
3648*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 4, 5, 7,  201, 4091    ;t16a, t31a
3649*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 4, 5, 7, 3035, 2751    ;t17a, t30a
3650*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t17
3651*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t16
3652*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m1                    ;t30
3653*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t31
3654*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7,  799, 4017    ;t17a, t30a
3655*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*19], m0                        ;t16
3656*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m5                        ;t17a
3657*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m4                        ;t30a
3658*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*34], m3                        ;t31
3659*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*21]     ;in9
3660*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*22]     ;in7
3661*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*31]     ;in25
3662*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*32]     ;in23
3663*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 4, 5, 7, 1751, 3703    ;t18a, t29a
3664*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 4, 5, 7, 3857, 1380    ;t19a, t28a
3665*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m2, m0                    ;t18
3666*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t19
3667*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t29
3668*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t28
3669*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m4017, 799    ;t18a, t29a
3670*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m5                        ;t18a
3671*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*22], m0                        ;t19
3672*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*31], m3                        ;t28
3673*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m4                        ;t29a
3674*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*23]     ;in5
3675*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*24]     ;in11
3676*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*29]     ;in21
3677*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*30]     ;in27
3678*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 4, 5, 7,  995, 3973    ;t20a, t27a
3679*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 4, 5, 7, 3513, 2106    ;t21a, t26a
3680*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t21
3681*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t20
3682*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m1                    ;t26
3683*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t27
3684*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 3406, 2276    ;t21a, t26a
3685*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*23], m0                        ;t20
3686*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m5                        ;t21a
3687*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m4                        ;t26a
3688*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m3                        ;t27
3689*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*25]     ;in13
3690*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*26]     ;in3
3691*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*27]     ;in29
3692*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*28]     ;in19
3693*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 3, 4, 5, 7, 2440, 3290    ;t22a, t25a
3694*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 4, 5, 7, 4052,  601    ;t23a, t24a
3695*c0909341SAndroid Build Coastguard Worker
3696*c0909341SAndroid Build Coastguard Worker.main2:
3697*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m2, m0                    ;t22
3698*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t23
3699*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t25
3700*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t24
3701*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m2276, 3406   ;t22a, t25a
3702*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*24]     ;t21a
3703*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m5, m2                    ;t21
3704*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m2                        ;t22
3705*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*25], m5                        ;t22
3706*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*29]     ;t26a
3707*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m4, m2                    ;t26
3708*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m2                        ;t25
3709*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*28], m4                        ;t25
3710*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 1, 2, 4, 7, m3784, 1567   ;t21a, t26a
3711*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m5                        ;t21a
3712*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m1                        ;t26a
3713*c0909341SAndroid Build Coastguard Worker
3714*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*23]     ;t20
3715*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*30]     ;t27
3716*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m1                    ;t20a
3717*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t23a
3718*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m3, m5                    ;t27a
3719*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m5                        ;t24a
3720*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            6, 2, 1, 5, 7, m3784, 1567   ;t20, t27
3721*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*26], m0                        ;t23a
3722*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*27], m3                        ;t24a
3723*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m2                        ;t27
3724*c0909341SAndroid Build Coastguard Worker
3725*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*20]     ;t17a
3726*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*21]     ;t18a
3727*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*32]     ;t29a
3728*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*33]     ;t30a
3729*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t18
3730*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t17
3731*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t29
3732*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t30
3733*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 1567, 3784    ;t18a, t29a
3734*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m0                        ;t17
3735*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m5                        ;t18a
3736*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m4                        ;t29a
3737*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m3                        ;t30
3738*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*19]     ;t16
3739*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*22]     ;t19
3740*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*31]     ;t28
3741*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*34]     ;t31
3742*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t19a
3743*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t16a
3744*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t28a
3745*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t31a
3746*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 1567, 3784    ;t19, t28
3747*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*15]     ;tmp12
3748*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m5, m6                    ;t20a
3749*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m6                        ;t19a
3750*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m5                    ;out19
3751*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m5                        ;out12
3752*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*30]     ;t27
3753*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*22], m6                        ;out19
3754*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*15], m2                        ;out12
3755*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m4, m5                    ;t27a
3756*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m5                        ;t28a
3757*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            6, 1, 2, 5, 7, 2896, 2896    ;t20, t27
3758*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*6 ]     ;tmp3
3759*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m4                    ;out28
3760*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m4                        ;out3
3761*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*14]     ;tmp11
3762*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*31], m5                        ;out28
3763*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*6 ], m2                        ;out3
3764*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m4, m6                    ;out20
3765*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m6                        ;out11
3766*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*7 ]     ;tmp4
3767*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*23], m5                        ;out20
3768*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*14], m4                        ;out11
3769*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m1                    ;out27
3770*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m1                        ;out4
3771*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*26]     ;t23a
3772*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*27]     ;t24a
3773*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m5                        ;out27
3774*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*7 ], m2                        ;out4
3775*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m0, m1                    ;t23
3776*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t16
3777*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m3, m4                    ;t24
3778*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m4                        ;t31
3779*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 5, 4, 6, 7, 2896, 2896    ;t23a, t24a
3780*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*18]     ;tmp15
3781*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m6, m0                    ;out16
3782*c0909341SAndroid Build Coastguard Worker    paddsw                  m6, m0                        ;out15
3783*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*3 ]     ;tmp0
3784*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*11]     ;tmp8
3785*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*18], m6                        ;out15
3786*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*19], m4                        ;out16
3787*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m0, m3                    ;out31
3788*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m3                        ;out0
3789*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m2                    ;out23
3790*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m2                        ;out8
3791*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*10]     ;tmp7
3792*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*34], m6                        ;out31
3793*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*11], m1                        ;out8
3794*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*26], m4                        ;out23
3795*c0909341SAndroid Build Coastguard Worker    paddsw                  m6, m3, m5                    ;out7
3796*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m5                        ;out24
3797*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*20]     ;t17
3798*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*25]     ;t22
3799*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*17]     ;tmp14
3800*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*27], m3                        ;out24
3801*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m5                    ;t22a
3802*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m5                        ;t17a
3803*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m2, m1                    ;out17
3804*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m1                        ;out14
3805*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*28]     ;t25
3806*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*33]     ;t30
3807*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*17], m2                        ;out14
3808*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m3                        ;out17
3809*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m1, m5                    ;t25a
3810*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m5                        ;t30a
3811*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 4, 3, 5, 7, 2896, 2896    ;t22, t25
3812*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*4 ]     ;tmp1
3813*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m5, m1                    ;out30
3814*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m1                        ;out1
3815*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*12]     ;tmp9
3816*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m3                        ;out30
3817*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*4 ], m5                        ;out1
3818*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m2                    ;out22
3819*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m2                        ;out9
3820*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*9 ]     ;tmp6
3821*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*25], m3                        ;out22
3822*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*12], m1                        ;out9
3823*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m5, m4                    ;out25
3824*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m4                        ;out6
3825*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*21]     ;t18a
3826*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*24]     ;t21a
3827*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*16]     ;tmp13
3828*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*28], m3                        ;out25
3829*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*9 ], m5                        ;out6
3830*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m4, m1                    ;t18
3831*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1                        ;t21
3832*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                    ;out18
3833*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m3                        ;out13
3834*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*29]     ;t26a
3835*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*32]     ;t29a
3836*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m5                        ;out18
3837*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*16], m2                        ;out13
3838*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m1                    ;t26
3839*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t29
3840*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 2896, 2896    ;t21a, t26a
3841*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*5 ]     ;tmp2
3842*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m2, m3                    ;out29
3843*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m3                        ;out2
3844*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*13]     ;tmp10
3845*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m1                        ;out29
3846*c0909341SAndroid Build Coastguard Worker    psubsw                  m7, m3, m5                    ;out21
3847*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m5                        ;out10
3848*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*8 ]     ;tmp5
3849*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m7                        ;out21
3850*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*13], m3                        ;out10
3851*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m5, m4                    ;out26
3852*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m4                        ;out5
3853*c0909341SAndroid Build Coastguard Worker    mova                    m7, m6                        ;out7
3854*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*6 ]     ;out3
3855*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*7 ]     ;out4
3856*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m1                        ;out26
3857*c0909341SAndroid Build Coastguard Worker    mova                    m6, [rsp+gprsize*2+16*9 ]     ;out6
3858*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*4 ]     ;out1
3859*c0909341SAndroid Build Coastguard Worker    ret
3860*c0909341SAndroid Build Coastguard Worker
3861*c0909341SAndroid Build Coastguard Worker
3862*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2
3863*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
3864*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
3865*c0909341SAndroid Build Coastguard Worker%endif
3866*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
3867*c0909341SAndroid Build Coastguard Worker    jz .dconly
3868*c0909341SAndroid Build Coastguard Worker    call  m(idct_32x8_internal_8bpc)
3869*c0909341SAndroid Build Coastguard Worker    RET
3870*c0909341SAndroid Build Coastguard Worker
3871*c0909341SAndroid Build Coastguard Worker.dconly:
3872*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
3873*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
3874*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_8192)]
3875*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
3876*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 8
3877*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
3878*c0909341SAndroid Build Coastguard Worker
3879*c0909341SAndroid Build Coastguard Worker.body:
3880*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m2
3881*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_2048)]  ;intentionally rip-relative
3882*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
3883*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m2
3884*c0909341SAndroid Build Coastguard Worker    pshuflw                 m0, m0, q0000
3885*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m0
3886*c0909341SAndroid Build Coastguard Worker    pxor                    m5, m5
3887*c0909341SAndroid Build Coastguard Worker
3888*c0909341SAndroid Build Coastguard Worker.loop:
3889*c0909341SAndroid Build Coastguard Worker    mova                    m1, [dstq+16*0]
3890*c0909341SAndroid Build Coastguard Worker    mova                    m3, [dstq+16*1]
3891*c0909341SAndroid Build Coastguard Worker    punpckhbw               m2, m1, m5
3892*c0909341SAndroid Build Coastguard Worker    punpcklbw               m1, m5
3893*c0909341SAndroid Build Coastguard Worker    punpckhbw               m4, m3, m5
3894*c0909341SAndroid Build Coastguard Worker    punpcklbw               m3, m5
3895*c0909341SAndroid Build Coastguard Worker    paddw                   m2, m0
3896*c0909341SAndroid Build Coastguard Worker    paddw                   m1, m0
3897*c0909341SAndroid Build Coastguard Worker    paddw                   m4, m0
3898*c0909341SAndroid Build Coastguard Worker    paddw                   m3, m0
3899*c0909341SAndroid Build Coastguard Worker    packuswb                m1, m2
3900*c0909341SAndroid Build Coastguard Worker    packuswb                m3, m4
3901*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*0], m1
3902*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*1], m3
3903*c0909341SAndroid Build Coastguard Worker    add                   dstq, strideq
3904*c0909341SAndroid Build Coastguard Worker    dec                    r3d
3905*c0909341SAndroid Build Coastguard Worker    jg .loop
3906*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
3907*c0909341SAndroid Build Coastguard Worker
3908*c0909341SAndroid Build Coastguard Worker.end:
3909*c0909341SAndroid Build Coastguard Worker    RET
3910*c0909341SAndroid Build Coastguard Worker
3911*c0909341SAndroid Build Coastguard Worker
3912*c0909341SAndroid Build Coastguard Workercglobal idct_32x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
3913*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 64
3914*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
3915*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
3916*c0909341SAndroid Build Coastguard Worker
3917*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*2, 64
3918*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
3919*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
3920*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
3921*c0909341SAndroid Build Coastguard Worker
3922*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 32
3923*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
3924*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
3925*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
3926*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
3927*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
3928*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
3929*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
3930*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
3931*c0909341SAndroid Build Coastguard Worker
3932*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 106
3933*c0909341SAndroid Build Coastguard Worker    jg  .full
3934*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
3935*c0909341SAndroid Build Coastguard Worker    jmp .pass2
3936*c0909341SAndroid Build Coastguard Worker
3937*c0909341SAndroid Build Coastguard Worker.full:
3938*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*17, 32
3939*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
3940*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
3941*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
3942*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
3943*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
3944*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
3945*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
3946*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
3947*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
3948*c0909341SAndroid Build Coastguard Worker
3949*c0909341SAndroid Build Coastguard Worker.pass2:
3950*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3951*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
3952*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x32_internal_8bpc).end1
3953*c0909341SAndroid Build Coastguard Worker
3954*c0909341SAndroid Build Coastguard Worker.end:
3955*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3956*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end1)]
3957*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3958*c0909341SAndroid Build Coastguard Worker
3959*c0909341SAndroid Build Coastguard Worker.end1:
3960*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
3961*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end2)]
3962*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass2_main
3963*c0909341SAndroid Build Coastguard Worker
3964*c0909341SAndroid Build Coastguard Worker.end2:
3965*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
3966*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3967*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3968*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end3)]
3969*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3970*c0909341SAndroid Build Coastguard Worker
3971*c0909341SAndroid Build Coastguard Worker.end3:
3972*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3973*c0909341SAndroid Build Coastguard Worker    add                     r3, 8
3974*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end4)]
3975*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass2_main
3976*c0909341SAndroid Build Coastguard Worker
3977*c0909341SAndroid Build Coastguard Worker.end4:
3978*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
3979*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3980*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3981*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end5)]
3982*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3983*c0909341SAndroid Build Coastguard Worker
3984*c0909341SAndroid Build Coastguard Worker.end5:
3985*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3986*c0909341SAndroid Build Coastguard Worker    add                     r3, 8
3987*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end6)]
3988*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass2_main
3989*c0909341SAndroid Build Coastguard Worker
3990*c0909341SAndroid Build Coastguard Worker.end6:
3991*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
3992*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
3993*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
3994*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end7)]
3995*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
3996*c0909341SAndroid Build Coastguard Worker
3997*c0909341SAndroid Build Coastguard Worker.end7:
3998*c0909341SAndroid Build Coastguard Worker    mov                   dstq, r3
3999*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end8)]
4000*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass2_main
4001*c0909341SAndroid Build Coastguard Worker
4002*c0909341SAndroid Build Coastguard Worker.end8:
4003*c0909341SAndroid Build Coastguard Worker    ret
4004*c0909341SAndroid Build Coastguard Worker
4005*c0909341SAndroid Build Coastguard Worker
4006*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2
4007*c0909341SAndroid Build Coastguard Worker    mov                    r5d, 4
4008*c0909341SAndroid Build Coastguard Worker    mov                   tx2d, 2
4009*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 107
4010*c0909341SAndroid Build Coastguard Worker    cmovns                tx2d, r5d
4011*c0909341SAndroid Build Coastguard Worker    mov                    r3d, tx2d
4012*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4013*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4014*c0909341SAndroid Build Coastguard Worker%endif
4015*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x8_internal_8bpc).end8)]
4016*c0909341SAndroid Build Coastguard Worker.loop:
4017*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 64
4018*c0909341SAndroid Build Coastguard Worker    paddsw                  m6, [o(pw_5)]
4019*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4020*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_5)]
4021*c0909341SAndroid Build Coastguard Worker    REPX        {paddsw x, m6}, m0, m1, m2, m3, m4, m5, m7
4022*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end3
4023*c0909341SAndroid Build Coastguard Worker    REPX        {psraw  x, 3 }, m0, m1, m2, m3, m4, m5, m6, m7
4024*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m5
4025*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4026*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4027*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).end3
4028*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
4029*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
4030*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+64*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7
4031*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4032*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4033*c0909341SAndroid Build Coastguard Worker    jg .loop
4034*c0909341SAndroid Build Coastguard Worker    RET
4035*c0909341SAndroid Build Coastguard Worker
4036*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2
4037*c0909341SAndroid Build Coastguard Worker    mov                    r5d, 4
4038*c0909341SAndroid Build Coastguard Worker    mov                   tx2d, 2
4039*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 107
4040*c0909341SAndroid Build Coastguard Worker    cmovns                tx2d, r5d
4041*c0909341SAndroid Build Coastguard Worker    mov                    r3d, tx2d
4042*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4043*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4044*c0909341SAndroid Build Coastguard Worker%endif
4045*c0909341SAndroid Build Coastguard Worker
4046*c0909341SAndroid Build Coastguard Worker.loop:
4047*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 16
4048*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m6, [o(pw_4096)]
4049*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4050*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_4096)]
4051*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m6}, m0, m1, m2, m3, m4, m5, m7
4052*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x8_internal_8bpc).end8)]
4053*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end3
4054*c0909341SAndroid Build Coastguard Worker
4055*c0909341SAndroid Build Coastguard Worker    mov             [rsp+16*3], dstq
4056*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m5
4057*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4058*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4059*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_8x8_internal_8bpc).end4)]
4060*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).end3
4061*c0909341SAndroid Build Coastguard Worker
4062*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*8
4063*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+16*3]
4064*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+8]
4065*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4066*c0909341SAndroid Build Coastguard Worker    jg .loop
4067*c0909341SAndroid Build Coastguard Worker    jnc .loop
4068*c0909341SAndroid Build Coastguard Worker    RET
4069*c0909341SAndroid Build Coastguard Worker
4070*c0909341SAndroid Build Coastguard Worker
4071*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2
4072*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4073*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4074*c0909341SAndroid Build Coastguard Worker%endif
4075*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4076*c0909341SAndroid Build Coastguard Worker    jz .dconly
4077*c0909341SAndroid Build Coastguard Worker    call  m(idct_16x32_internal_8bpc)
4078*c0909341SAndroid Build Coastguard Worker.end:
4079*c0909341SAndroid Build Coastguard Worker    RET
4080*c0909341SAndroid Build Coastguard Worker
4081*c0909341SAndroid Build Coastguard Worker.dconly:
4082*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
4083*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
4084*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_16384)]
4085*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
4086*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
4087*c0909341SAndroid Build Coastguard Worker    mov                    r2d, 16
4088*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
4089*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly
4090*c0909341SAndroid Build Coastguard Worker
4091*c0909341SAndroid Build Coastguard Worker
4092*c0909341SAndroid Build Coastguard Workercglobal idct_16x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
4093*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*1, 128, 1
4094*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4095*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4096*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*5, 128, 1
4097*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4098*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
4099*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4100*c0909341SAndroid Build Coastguard Worker
4101*c0909341SAndroid Build Coastguard Worker.pass1_end:
4102*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*33, 64               ;in8~in15
4103*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
4104*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4105*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
4106*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4107*c0909341SAndroid Build Coastguard Worker
4108*c0909341SAndroid Build Coastguard Worker.pass1_end1:
4109*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*1 ], m0                        ;in8
4110*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*5 ], m4                        ;in12
4111*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*13], m2                        ;in10
4112*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*14], m6                        ;in14
4113*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m1                        ;in9
4114*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m3                        ;in11
4115*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m5                        ;in13
4116*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
4117*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 128, 1
4118*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4119*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4120*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*4, 128, 1
4121*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4122*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
4123*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4124*c0909341SAndroid Build Coastguard Worker
4125*c0909341SAndroid Build Coastguard Worker.pass1_end2:
4126*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*32, 64               ;in0~in7
4127*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
4128*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4129*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
4130*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4131*c0909341SAndroid Build Coastguard Worker
4132*c0909341SAndroid Build Coastguard Worker.pass1_end3:
4133*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*11], m2                        ;in2
4134*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*12], m6                        ;in6
4135*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m1                        ;in1
4136*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m3                        ;in3
4137*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m5                        ;in5
4138*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m7                        ;in7
4139*c0909341SAndroid Build Coastguard Worker
4140*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 150
4141*c0909341SAndroid Build Coastguard Worker    jg .full
4142*c0909341SAndroid Build Coastguard Worker
4143*c0909341SAndroid Build Coastguard Worker    mova                    m1, m4                        ;in4
4144*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*1 ]            ;in8
4145*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*5 ]            ;in12
4146*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4147*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4148*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4149*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4150*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize+16*11]       ;in2
4151*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize+16*12]       ;in6
4152*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize+16*13]       ;in10
4153*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize+16*14]       ;in14
4154*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4155*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4156*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4157*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4158*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4159*c0909341SAndroid Build Coastguard Worker
4160*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
4161*c0909341SAndroid Build Coastguard Worker    jmp  .pass2
4162*c0909341SAndroid Build Coastguard Worker
4163*c0909341SAndroid Build Coastguard Worker.full:
4164*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*0 ], m0                        ;in0
4165*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*4 ], m4                        ;in4
4166*c0909341SAndroid Build Coastguard Worker
4167*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*2, 128, 1
4168*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4169*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4170*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*6, 128, 1
4171*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4172*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
4173*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4174*c0909341SAndroid Build Coastguard Worker
4175*c0909341SAndroid Build Coastguard Worker.pass1_end4:
4176*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*34, 64               ;in16~in23
4177*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
4178*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4179*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end5)]
4180*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4181*c0909341SAndroid Build Coastguard Worker
4182*c0909341SAndroid Build Coastguard Worker.pass1_end5:
4183*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*2 ], m0                        ;in16
4184*c0909341SAndroid Build Coastguard Worker    mova        [coeffq+16*6 ], m4                        ;in20
4185*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*15], m2                        ;in18
4186*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*16], m6                        ;in22
4187*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m1                        ;in17
4188*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m3                        ;in19
4189*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m5                        ;in21
4190*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m7                        ;in23
4191*c0909341SAndroid Build Coastguard Worker
4192*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*3, 128, 1
4193*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4194*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4195*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*7, 128, 1
4196*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4197*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end6)]
4198*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4199*c0909341SAndroid Build Coastguard Worker
4200*c0909341SAndroid Build Coastguard Worker.pass1_end6:
4201*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*35, 64                        ;in24~in31
4202*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
4203*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4204*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end7)]
4205*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4206*c0909341SAndroid Build Coastguard Worker
4207*c0909341SAndroid Build Coastguard Worker.pass1_end7:
4208*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*17], m2                        ;in26
4209*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*18], m6                        ;in30
4210*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m1                        ;in25
4211*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m3                        ;in27
4212*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m5                        ;in29
4213*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
4214*c0909341SAndroid Build Coastguard Worker
4215*c0909341SAndroid Build Coastguard Worker    mova                    m6, m0                        ;in24
4216*c0909341SAndroid Build Coastguard Worker    mova                    m7, m4                        ;in28
4217*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]            ;in0
4218*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*4 ]            ;in4
4219*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*1 ]            ;in8
4220*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*5 ]            ;in12
4221*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*2 ]            ;in16
4222*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*6 ]            ;in20
4223*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4224*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS   rsp+gprsize+16*3 , 16
4225*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
4226*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4227*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4228*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4229*c0909341SAndroid Build Coastguard Worker
4230*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
4231*c0909341SAndroid Build Coastguard Worker
4232*c0909341SAndroid Build Coastguard Worker.pass2:
4233*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*35], eobd
4234*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
4235*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*35], r3
4236*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.end)]
4237*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x32_internal_8bpc).end
4238*c0909341SAndroid Build Coastguard Worker
4239*c0909341SAndroid Build Coastguard Worker.end:
4240*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*35]
4241*c0909341SAndroid Build Coastguard Worker    mov                   eobd, [rsp+gprsize*1+16*35]
4242*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*32
4243*c0909341SAndroid Build Coastguard Worker
4244*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]            ;in1
4245*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*12]            ;in3
4246*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*20]            ;in5
4247*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*28]            ;in7
4248*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*5 ]            ;in9
4249*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*13]            ;in11
4250*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*21]            ;in13
4251*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*29]            ;in15
4252*c0909341SAndroid Build Coastguard Worker
4253*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
4254*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
4255*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
4256*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
4257*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
4258*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
4259*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
4260*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
4261*c0909341SAndroid Build Coastguard Worker
4262*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]            ;in0
4263*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*16]            ;in4
4264*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*1 ]            ;in8
4265*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*17]            ;in12
4266*c0909341SAndroid Build Coastguard Worker
4267*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 150
4268*c0909341SAndroid Build Coastguard Worker    jg .full1
4269*c0909341SAndroid Build Coastguard Worker
4270*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4271*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4272*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4273*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4274*c0909341SAndroid Build Coastguard Worker
4275*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]            ;in2
4276*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]            ;in6
4277*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]            ;in10
4278*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]            ;in14
4279*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4280*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4281*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4282*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4283*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4284*c0909341SAndroid Build Coastguard Worker
4285*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
4286*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x32_internal_8bpc).pass2
4287*c0909341SAndroid Build Coastguard Worker
4288*c0909341SAndroid Build Coastguard Worker.full1:
4289*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*2 ]            ;in16
4290*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*18]            ;in20
4291*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*3 ]            ;in24
4292*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*19]            ;in26
4293*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4294*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4295*c0909341SAndroid Build Coastguard Worker
4296*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]            ;in2
4297*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]            ;in6
4298*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]            ;in10
4299*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]            ;in14
4300*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*10]            ;in18
4301*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*26]            ;in22
4302*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*11]            ;in26
4303*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*27]            ;in30
4304*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4305*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4306*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4307*c0909341SAndroid Build Coastguard Worker
4308*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*6 ]            ;in17
4309*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*14]            ;in19
4310*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*22]            ;in21
4311*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*30]            ;in23
4312*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*7 ]            ;in25
4313*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*15]            ;in27
4314*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*23]            ;in29
4315*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*31]            ;in31
4316*c0909341SAndroid Build Coastguard Worker
4317*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
4318*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
4319*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
4320*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
4321*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
4322*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
4323*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
4324*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
4325*c0909341SAndroid Build Coastguard Worker
4326*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
4327*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x32_internal_8bpc).pass2
4328*c0909341SAndroid Build Coastguard Worker
4329*c0909341SAndroid Build Coastguard Worker
4330*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2
4331*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4332*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4333*c0909341SAndroid Build Coastguard Worker%endif
4334*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4335*c0909341SAndroid Build Coastguard Worker    jz .dconly
4336*c0909341SAndroid Build Coastguard Worker
4337*c0909341SAndroid Build Coastguard Worker    call m(idct_32x16_internal_8bpc)
4338*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_8bpc).pass2
4339*c0909341SAndroid Build Coastguard Worker
4340*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*16
4341*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [r3+8]
4342*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS       rsp+16*11, 16
4343*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4344*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4345*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end
4346*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_8bpc).pass2
4347*c0909341SAndroid Build Coastguard Worker
4348*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*16
4349*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [r3+8]
4350*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS       rsp+16*19, 16
4351*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4352*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4353*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end
4354*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_8bpc).pass2
4355*c0909341SAndroid Build Coastguard Worker
4356*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*16
4357*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [r3+8]
4358*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS       rsp+16*27, 16
4359*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4360*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4361*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end
4362*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_8bpc).pass2
4363*c0909341SAndroid Build Coastguard Worker    RET
4364*c0909341SAndroid Build Coastguard Worker
4365*c0909341SAndroid Build Coastguard Worker.dconly:
4366*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
4367*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
4368*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_16384)]
4369*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
4370*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
4371*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 16
4372*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(inv_txfm_add_dct_dct_32x8_8bpc).end)]
4373*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body
4374*c0909341SAndroid Build Coastguard Worker
4375*c0909341SAndroid Build Coastguard Worker
4376*c0909341SAndroid Build Coastguard Workercglobal idct_32x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
4377*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4378*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.pass1_end1)]
4379*c0909341SAndroid Build Coastguard Worker.pass1:
4380*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*0, 128, 1
4381*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4382*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4383*c0909341SAndroid Build Coastguard Worker
4384*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*4, 128, 1
4385*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4386*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4387*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4388*c0909341SAndroid Build Coastguard Worker
4389*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+16*2, 64, 1
4390*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
4391*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
4392*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
4393*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
4394*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
4395*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
4396*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
4397*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
4398*c0909341SAndroid Build Coastguard Worker
4399*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+16*34, 64, 1
4400*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
4401*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
4402*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
4403*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
4404*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
4405*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
4406*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
4407*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
4408*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
4409*c0909341SAndroid Build Coastguard Worker
4410*c0909341SAndroid Build Coastguard Worker.pass1_end:
4411*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
4412*c0909341SAndroid Build Coastguard Worker    mov                   tx2q, r3
4413*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4414*c0909341SAndroid Build Coastguard Worker
4415*c0909341SAndroid Build Coastguard Worker.pass1_end1:
4416*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+16*0, 32
4417*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
4418*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
4419*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
4420*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4421*c0909341SAndroid Build Coastguard Worker
4422*c0909341SAndroid Build Coastguard Worker.pass1_end2:
4423*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*16, 32
4424*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
4425*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
4426*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
4427*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4428*c0909341SAndroid Build Coastguard Worker
4429*c0909341SAndroid Build Coastguard Worker.pass1_end3:
4430*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*32, 32
4431*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
4432*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0 ], m7
4433*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
4434*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
4435*c0909341SAndroid Build Coastguard Worker
4436*c0909341SAndroid Build Coastguard Worker.pass1_end4:
4437*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+16*48, 32
4438*c0909341SAndroid Build Coastguard Worker
4439*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, 16
4440*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.end)]
4441*c0909341SAndroid Build Coastguard Worker    jmp .pass1
4442*c0909341SAndroid Build Coastguard Worker
4443*c0909341SAndroid Build Coastguard Worker.end:
4444*c0909341SAndroid Build Coastguard Worker    ret
4445*c0909341SAndroid Build Coastguard Worker
4446*c0909341SAndroid Build Coastguard Worker
4447*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2
4448*c0909341SAndroid Build Coastguard Worker    mov                    r4d, eobd
4449*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 43                ;if (eob > 43)
4450*c0909341SAndroid Build Coastguard Worker    sbb                    r3d, r3d               ;  iteration_count++
4451*c0909341SAndroid Build Coastguard Worker    cmp                    r4d, 150               ;if (eob > 150)
4452*c0909341SAndroid Build Coastguard Worker    sbb                    r3d, 0                 ;  iteration_count++
4453*c0909341SAndroid Build Coastguard Worker    cmp                    r4d, 278               ;if (eob > 278)
4454*c0909341SAndroid Build Coastguard Worker    sbb                    r3d, -4                ;  iteration_count++
4455*c0909341SAndroid Build Coastguard Worker
4456*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4457*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4458*c0909341SAndroid Build Coastguard Worker%endif
4459*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
4460*c0909341SAndroid Build Coastguard Worker    mov             [rsp+16*3], r4
4461*c0909341SAndroid Build Coastguard Worker    mov     [rsp+gprsize+16*3], r3d
4462*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*2+16*3], coeffq
4463*c0909341SAndroid Build Coastguard Worker
4464*c0909341SAndroid Build Coastguard Worker.loop:
4465*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 64, 1
4466*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4467*c0909341SAndroid Build Coastguard Worker    pxor                    m6, m6
4468*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+64*x], m6}, 0,  1,  2,  3,  4,  5,  6,  7
4469*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4470*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end3
4471*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m2
4472*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m3
4473*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m4
4474*c0909341SAndroid Build Coastguard Worker    mova                    m3, [o(pw_1697x16)]
4475*c0909341SAndroid Build Coastguard Worker    mova                    m4, [o(pw_16384)]
4476*c0909341SAndroid Build Coastguard Worker    REPX   {IDTX16 x, 2, 3, 4}, 5, 6, 7, 0, 1
4477*c0909341SAndroid Build Coastguard Worker    mova                    m2, [o(pw_8192)]
4478*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m2}, m5, m6, m7, m0, m1
4479*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+16*0]
4480*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4481*c0909341SAndroid Build Coastguard Worker    IDTX16                   2, 7, 3, 4
4482*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+16*2]
4483*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m5
4484*c0909341SAndroid Build Coastguard Worker    IDTX16                   7, 5, 3, 4
4485*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+16*1]
4486*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4487*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m5
4488*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m4
4489*c0909341SAndroid Build Coastguard Worker    psrlw                   m4, 1 ; pw_8192
4490*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m5
4491*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m4
4492*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m4
4493*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m4, m7
4494*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).end3
4495*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
4496*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4497*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4498*c0909341SAndroid Build Coastguard Worker    jg .loop
4499*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*3]
4500*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 64*8
4501*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize+16*3]
4502*c0909341SAndroid Build Coastguard Worker    xor                   dstq, dstq
4503*c0909341SAndroid Build Coastguard Worker    mov     [rsp+gprsize+16*3], dstq
4504*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+16*3]
4505*c0909341SAndroid Build Coastguard Worker    test                   r3d, r3d
4506*c0909341SAndroid Build Coastguard Worker    jnz .loop
4507*c0909341SAndroid Build Coastguard Worker    RET
4508*c0909341SAndroid Build Coastguard Worker
4509*c0909341SAndroid Build Coastguard Worker
4510*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2
4511*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 12                ;0100b
4512*c0909341SAndroid Build Coastguard Worker    mov                    r5d, 136               ;1000 1000b
4513*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 44                ;if (eob > 43)
4514*c0909341SAndroid Build Coastguard Worker    cmovns                 r4d, r5d               ;  iteration_count+2
4515*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 151               ;if (eob > 150)
4516*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 34952             ;1000 1000 1000 1000b
4517*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d               ;  iteration_count += 4
4518*c0909341SAndroid Build Coastguard Worker
4519*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4520*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4521*c0909341SAndroid Build Coastguard Worker%endif
4522*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
4523*c0909341SAndroid Build Coastguard Worker    mov             [rsp+16*3], r4
4524*c0909341SAndroid Build Coastguard Worker
4525*c0909341SAndroid Build Coastguard Worker.loop:
4526*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 32, 1
4527*c0909341SAndroid Build Coastguard Worker    REPX         {paddsw x, x}, m0, m1, m2, m3, m4, m5, m6, m7
4528*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4529*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4530*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end3
4531*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m5
4532*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m6
4533*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_1697x16)]
4534*c0909341SAndroid Build Coastguard Worker    REPX      {IDTX16 x, 5, 6}, 7, 0, 1, 2, 3, 4
4535*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [o(pw_2048)]
4536*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+16*1]
4537*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4538*c0909341SAndroid Build Coastguard Worker    IDTX16                   5, 7, 6
4539*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+16*2]
4540*c0909341SAndroid Build Coastguard Worker    IDTX16                   7, 6, 6
4541*c0909341SAndroid Build Coastguard Worker    mova                    m6, [o(pw_2048)]
4542*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m6}, m0, m1, m2, m3, m4, m5, m7
4543*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m5
4544*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m7
4545*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).end3
4546*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
4547*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
4548*c0909341SAndroid Build Coastguard Worker    REPX {mova [coeffq+32*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7
4549*c0909341SAndroid Build Coastguard Worker
4550*c0909341SAndroid Build Coastguard Worker.loop_end:
4551*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4552*c0909341SAndroid Build Coastguard Worker    shr                    r3d, 2
4553*c0909341SAndroid Build Coastguard Worker    jz .ret
4554*c0909341SAndroid Build Coastguard Worker    test                   r3d, 2
4555*c0909341SAndroid Build Coastguard Worker    jnz .loop
4556*c0909341SAndroid Build Coastguard Worker    mov                    r4d, r3d
4557*c0909341SAndroid Build Coastguard Worker    and                    r4d, 1
4558*c0909341SAndroid Build Coastguard Worker    lea                 coeffq, [coeffq+r4*8+32*7]
4559*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+16*3]
4560*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
4561*c0909341SAndroid Build Coastguard Worker    mov             [rsp+16*3], r4
4562*c0909341SAndroid Build Coastguard Worker    jmp .loop
4563*c0909341SAndroid Build Coastguard Worker
4564*c0909341SAndroid Build Coastguard Worker.ret:
4565*c0909341SAndroid Build Coastguard Worker    RET
4566*c0909341SAndroid Build Coastguard Worker
4567*c0909341SAndroid Build Coastguard Worker
4568*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2
4569*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4570*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4571*c0909341SAndroid Build Coastguard Worker%endif
4572*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4573*c0909341SAndroid Build Coastguard Worker    jz .dconly
4574*c0909341SAndroid Build Coastguard Worker
4575*c0909341SAndroid Build Coastguard Worker    call m(idct_32x32_internal_8bpc)
4576*c0909341SAndroid Build Coastguard Worker    RET
4577*c0909341SAndroid Build Coastguard Worker
4578*c0909341SAndroid Build Coastguard Worker.dconly:
4579*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
4580*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
4581*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_8192)]
4582*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
4583*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 32
4584*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(inv_txfm_add_dct_dct_32x8_8bpc).end)]
4585*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body
4586*c0909341SAndroid Build Coastguard Worker
4587*c0909341SAndroid Build Coastguard Worker
4588*c0909341SAndroid Build Coastguard Workercglobal idct_32x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
4589*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
4590*c0909341SAndroid Build Coastguard Worker    sub                   eobd, 136
4591*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*35], eobd
4592*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
4593*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d
4594*c0909341SAndroid Build Coastguard Worker
4595*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4596*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4597*c0909341SAndroid Build Coastguard Worker%endif
4598*c0909341SAndroid Build Coastguard Worker
4599*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*35], coeffq
4600*c0909341SAndroid Build Coastguard Worker
4601*c0909341SAndroid Build Coastguard Worker.pass1_loop:
4602*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*1, 64*2
4603*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
4604*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
4605*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
4606*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
4607*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
4608*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
4609*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
4610*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
4611*c0909341SAndroid Build Coastguard Worker
4612*c0909341SAndroid Build Coastguard Worker    mov                   tx2d, [rsp+gprsize*1+16*35]
4613*c0909341SAndroid Build Coastguard Worker    test                  tx2d, tx2d
4614*c0909341SAndroid Build Coastguard Worker    jl .fast
4615*c0909341SAndroid Build Coastguard Worker
4616*c0909341SAndroid Build Coastguard Worker.full:
4617*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*0, 64*4
4618*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4619*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4620*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*2, 64*4
4621*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4622*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4623*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4624*c0909341SAndroid Build Coastguard Worker
4625*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+64*17, 64*2
4626*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
4627*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
4628*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
4629*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
4630*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
4631*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
4632*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
4633*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
4634*c0909341SAndroid Build Coastguard Worker
4635*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
4636*c0909341SAndroid Build Coastguard Worker    jmp .pass1_end
4637*c0909341SAndroid Build Coastguard Worker
4638*c0909341SAndroid Build Coastguard Worker.fast:
4639*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+256*0]
4640*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+256*1]
4641*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+256*2]
4642*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+256*3]
4643*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4644*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4645*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4646*c0909341SAndroid Build Coastguard Worker
4647*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4648*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+128*1]
4649*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+128*3]
4650*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+128*5]
4651*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+128*7]
4652*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4653*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4654*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4655*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4656*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4657*c0909341SAndroid Build Coastguard Worker
4658*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
4659*c0909341SAndroid Build Coastguard Worker
4660*c0909341SAndroid Build Coastguard Worker.pass1_end:
4661*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4662*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4663*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
4664*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4665*c0909341SAndroid Build Coastguard Worker
4666*c0909341SAndroid Build Coastguard Worker.pass1_end1:
4667*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*0, 64
4668*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
4669*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4670*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4671*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
4672*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4673*c0909341SAndroid Build Coastguard Worker
4674*c0909341SAndroid Build Coastguard Worker.pass1_end2:
4675*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*8, 64
4676*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
4677*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4678*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4679*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
4680*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4681*c0909341SAndroid Build Coastguard Worker
4682*c0909341SAndroid Build Coastguard Worker.pass1_end3:
4683*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*16, 64
4684*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
4685*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4686*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4687*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
4688*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4689*c0909341SAndroid Build Coastguard Worker
4690*c0909341SAndroid Build Coastguard Worker.pass1_end4:
4691*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*24, 64
4692*c0909341SAndroid Build Coastguard Worker
4693*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4694*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4695*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
4696*c0909341SAndroid Build Coastguard Worker
4697*c0909341SAndroid Build Coastguard Worker
4698*c0909341SAndroid Build Coastguard Worker.pass2:
4699*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*35]
4700*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
4701*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass2_end)]
4702*c0909341SAndroid Build Coastguard Worker
4703*c0909341SAndroid Build Coastguard Worker.pass2_loop:
4704*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*3+16*35], r3d
4705*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
4706*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*35], r3
4707*c0909341SAndroid Build Coastguard Worker
4708*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]
4709*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*12]
4710*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*20]
4711*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*28]
4712*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*5 ]
4713*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*13]
4714*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*21]
4715*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*29]
4716*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
4717*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
4718*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
4719*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
4720*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
4721*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
4722*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
4723*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
4724*c0909341SAndroid Build Coastguard Worker
4725*c0909341SAndroid Build Coastguard Worker    mov                   eobd, [rsp+gprsize*1+16*35]
4726*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4727*c0909341SAndroid Build Coastguard Worker    jl .fast1
4728*c0909341SAndroid Build Coastguard Worker
4729*c0909341SAndroid Build Coastguard Worker.full1:
4730*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]
4731*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*16]
4732*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*1 ]
4733*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*17]
4734*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*2 ]
4735*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*18]
4736*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*3 ]
4737*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*19]
4738*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4739*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4740*c0909341SAndroid Build Coastguard Worker
4741*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]
4742*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]
4743*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]
4744*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]
4745*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*10]
4746*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*26]
4747*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*11]
4748*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*27]
4749*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4750*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4751*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4752*c0909341SAndroid Build Coastguard Worker
4753*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*6 ]
4754*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*14]
4755*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*22]
4756*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*30]
4757*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*7 ]
4758*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*15]
4759*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*23]
4760*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*31]
4761*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
4762*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
4763*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
4764*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
4765*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
4766*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
4767*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
4768*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
4769*c0909341SAndroid Build Coastguard Worker
4770*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
4771*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
4772*c0909341SAndroid Build Coastguard Worker
4773*c0909341SAndroid Build Coastguard Worker.fast1:
4774*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0 ]
4775*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*16]
4776*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*1 ]
4777*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*17]
4778*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4779*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4780*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4781*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4782*c0909341SAndroid Build Coastguard Worker
4783*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]
4784*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]
4785*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]
4786*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]
4787*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4788*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4789*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4790*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4791*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4792*c0909341SAndroid Build Coastguard Worker
4793*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
4794*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
4795*c0909341SAndroid Build Coastguard Worker
4796*c0909341SAndroid Build Coastguard Worker.pass2_end:
4797*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.pass2_end1)]
4798*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x32_internal_8bpc).end
4799*c0909341SAndroid Build Coastguard Worker
4800*c0909341SAndroid Build Coastguard Worker.pass2_end1:
4801*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass2_end)]
4802*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*32
4803*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*35]
4804*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*3+16*35]
4805*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4806*c0909341SAndroid Build Coastguard Worker    jg .pass2_loop
4807*c0909341SAndroid Build Coastguard Worker
4808*c0909341SAndroid Build Coastguard Worker    ret
4809*c0909341SAndroid Build Coastguard Worker
4810*c0909341SAndroid Build Coastguard Worker
4811*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_8bpc, 4, 6, 8, 16*5, dst, stride, coeff, eob, tx2
4812*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
4813*c0909341SAndroid Build Coastguard Worker    cmp                   eobd, 136
4814*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
4815*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d
4816*c0909341SAndroid Build Coastguard Worker
4817*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4818*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4819*c0909341SAndroid Build Coastguard Worker%endif
4820*c0909341SAndroid Build Coastguard Worker
4821*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
4822*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*0+16*3], r4
4823*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*1+16*3], r3d
4824*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*2+16*3], r3d
4825*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*3+16*3], coeffq
4826*c0909341SAndroid Build Coastguard Worker
4827*c0909341SAndroid Build Coastguard Worker.loop:
4828*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS          coeffq, 64
4829*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4830*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x16_internal_8bpc).end)]
4831*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).pass1_end3
4832*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [o(pw_8192)]
4833*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*0], m7
4834*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4835*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
4836*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*1], m6
4837*c0909341SAndroid Build Coastguard Worker    mova            [rsp+16*2], m5
4838*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).end3
4839*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
4840*c0909341SAndroid Build Coastguard Worker
4841*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
4842*c0909341SAndroid Build Coastguard Worker    REPX   {mova [coeffq+64*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7
4843*c0909341SAndroid Build Coastguard Worker
4844*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4845*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4846*c0909341SAndroid Build Coastguard Worker    jg .loop
4847*c0909341SAndroid Build Coastguard Worker
4848*c0909341SAndroid Build Coastguard Worker    mov                    r4d, [rsp+gprsize*2+16*3]
4849*c0909341SAndroid Build Coastguard Worker    dec                    r4d
4850*c0909341SAndroid Build Coastguard Worker    jle .ret
4851*c0909341SAndroid Build Coastguard Worker
4852*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*0+16*3]
4853*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*3+16*3]
4854*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*2+16*3], r4
4855*c0909341SAndroid Build Coastguard Worker    lea                     r3, [dstq+8]
4856*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 64*8
4857*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*0+16*3], r3
4858*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*1+16*3]
4859*c0909341SAndroid Build Coastguard Worker    mov   [rsp+gprsize*3+16*3], coeffq
4860*c0909341SAndroid Build Coastguard Worker    jmp .loop
4861*c0909341SAndroid Build Coastguard Worker
4862*c0909341SAndroid Build Coastguard Worker.ret:
4863*c0909341SAndroid Build Coastguard Worker    RET
4864*c0909341SAndroid Build Coastguard Worker
4865*c0909341SAndroid Build Coastguard Worker
4866*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_8bpc, 4, 6, 8, 16*68, dst, stride, coeff, eob, tx2
4867*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4868*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4869*c0909341SAndroid Build Coastguard Worker%endif
4870*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4871*c0909341SAndroid Build Coastguard Worker    jz .dconly
4872*c0909341SAndroid Build Coastguard Worker    call m(idct_16x64_internal_8bpc)
4873*c0909341SAndroid Build Coastguard Worker.end:
4874*c0909341SAndroid Build Coastguard Worker    RET
4875*c0909341SAndroid Build Coastguard Worker
4876*c0909341SAndroid Build Coastguard Worker.dconly:
4877*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
4878*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
4879*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_8192)]
4880*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
4881*c0909341SAndroid Build Coastguard Worker    mov                    r2d, 32
4882*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
4883*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly
4884*c0909341SAndroid Build Coastguard Worker
4885*c0909341SAndroid Build Coastguard Worker
4886*c0909341SAndroid Build Coastguard Workercglobal idct_16x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
4887*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
4888*c0909341SAndroid Build Coastguard Worker    sub                   eobd, 151
4889*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], eobd
4890*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
4891*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d
4892*c0909341SAndroid Build Coastguard Worker
4893*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
4894*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
4895*c0909341SAndroid Build Coastguard Worker%endif
4896*c0909341SAndroid Build Coastguard Worker
4897*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], coeffq
4898*c0909341SAndroid Build Coastguard Worker
4899*c0909341SAndroid Build Coastguard Worker.pass1_loop:
4900*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*0, 64*2
4901*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4902*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4903*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*1, 64*2
4904*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4905*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4906*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
4907*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4908*c0909341SAndroid Build Coastguard Worker
4909*c0909341SAndroid Build Coastguard Worker.pass1_end:
4910*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*8, 64
4911*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
4912*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
4913*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
4914*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
4915*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
4916*c0909341SAndroid Build Coastguard Worker
4917*c0909341SAndroid Build Coastguard Worker.pass1_end1:
4918*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*0, 64
4919*c0909341SAndroid Build Coastguard Worker
4920*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
4921*c0909341SAndroid Build Coastguard Worker    dec                    r3d
4922*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
4923*c0909341SAndroid Build Coastguard Worker
4924*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*67]
4925*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 2
4926*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
4927*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
4928*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(.end1)]
4929*c0909341SAndroid Build Coastguard Worker
4930*c0909341SAndroid Build Coastguard Worker.pass2_loop:
4931*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*3+16*67], r3d
4932*c0909341SAndroid Build Coastguard Worker    mov                   eobd, [rsp+gprsize*1+16*67]
4933*c0909341SAndroid Build Coastguard Worker
4934*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*4 ]            ;in1
4935*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*12]            ;in3
4936*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*20]            ;in5
4937*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*28]            ;in7
4938*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*5 ]            ;in9
4939*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*13]            ;in11
4940*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*21]            ;in13
4941*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*29]            ;in15
4942*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*35], m0                        ;in1
4943*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*49], m1                        ;in3
4944*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*43], m2                        ;in5
4945*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*41], m3                        ;in7
4946*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*39], m4                        ;in9
4947*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*45], m5                        ;in11
4948*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*47], m6                        ;in13
4949*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*37], m7                        ;in15
4950*c0909341SAndroid Build Coastguard Worker
4951*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4952*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*0]
4953*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*1]
4954*c0909341SAndroid Build Coastguard Worker
4955*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
4956*c0909341SAndroid Build Coastguard Worker    jl .fast
4957*c0909341SAndroid Build Coastguard Worker
4958*c0909341SAndroid Build Coastguard Worker.full:
4959*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*2]
4960*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*3]
4961*c0909341SAndroid Build Coastguard Worker
4962*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4963*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
4964*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
4965*c0909341SAndroid Build Coastguard Worker
4966*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
4967*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*16]
4968*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*17]
4969*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*18]
4970*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*19]
4971*c0909341SAndroid Build Coastguard Worker
4972*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
4973*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4974*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
4975*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
4976*c0909341SAndroid Build Coastguard Worker
4977*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]
4978*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]
4979*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]
4980*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]
4981*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*10]
4982*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*26]
4983*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*11]
4984*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*27]
4985*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0
4986*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1
4987*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2
4988*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3
4989*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4
4990*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5
4991*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6
4992*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7
4993*c0909341SAndroid Build Coastguard Worker
4994*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
4995*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    rsp+gprsize+16*3, 16
4996*c0909341SAndroid Build Coastguard Worker
4997*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*6 ]            ;in17
4998*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*14]            ;in19
4999*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*22]            ;in21
5000*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*30]            ;in23
5001*c0909341SAndroid Build Coastguard Worker    mova                    m4, [coeffq+16*7 ]            ;in25
5002*c0909341SAndroid Build Coastguard Worker    mova                    m5, [coeffq+16*15]            ;in27
5003*c0909341SAndroid Build Coastguard Worker    mova                    m6, [coeffq+16*23]            ;in29
5004*c0909341SAndroid Build Coastguard Worker    mova                    m7, [coeffq+16*31]            ;in31
5005*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*63], m0                        ;in17
5006*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*53], m1                        ;in19
5007*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*55], m2                        ;in21
5008*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*61], m3                        ;in23
5009*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*59], m4                        ;in25
5010*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*57], m5                        ;in27
5011*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*51], m6                        ;in29
5012*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*65], m7                        ;in31
5013*c0909341SAndroid Build Coastguard Worker
5014*c0909341SAndroid Build Coastguard Worker    call .main
5015*c0909341SAndroid Build Coastguard Worker    jmp  .end
5016*c0909341SAndroid Build Coastguard Worker
5017*c0909341SAndroid Build Coastguard Worker.fast:
5018*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m2, m3, m5, m6, m7
5019*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
5020*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
5021*c0909341SAndroid Build Coastguard Worker
5022*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
5023*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*16]
5024*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*17]
5025*c0909341SAndroid Build Coastguard Worker
5026*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m2, m3, m5, m6, m7
5027*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
5028*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
5029*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
5030*c0909341SAndroid Build Coastguard Worker
5031*c0909341SAndroid Build Coastguard Worker    mova                    m0, [coeffq+16*8 ]
5032*c0909341SAndroid Build Coastguard Worker    mova                    m1, [coeffq+16*24]
5033*c0909341SAndroid Build Coastguard Worker    mova                    m2, [coeffq+16*9 ]
5034*c0909341SAndroid Build Coastguard Worker    mova                    m3, [coeffq+16*25]
5035*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
5036*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
5037*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
5038*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
5039*c0909341SAndroid Build Coastguard Worker
5040*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_veryfast
5041*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    rsp+gprsize+16*3, 16
5042*c0909341SAndroid Build Coastguard Worker
5043*c0909341SAndroid Build Coastguard Worker    call .main_fast
5044*c0909341SAndroid Build Coastguard Worker
5045*c0909341SAndroid Build Coastguard Worker.end:
5046*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
5047*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5048*c0909341SAndroid Build Coastguard Worker    mov                     r3, r4
5049*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x32_internal_8bpc).end2
5050*c0909341SAndroid Build Coastguard Worker
5051*c0909341SAndroid Build Coastguard Worker.end1:
5052*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*35, 16
5053*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
5054*c0909341SAndroid Build Coastguard Worker    lea                     r3, [rsp+16*32+gprsize]
5055*c0909341SAndroid Build Coastguard Worker    call .write
5056*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
5057*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*3+16*67]
5058*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
5059*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
5060*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(.end1)]
5061*c0909341SAndroid Build Coastguard Worker
5062*c0909341SAndroid Build Coastguard Worker    dec                    r3d
5063*c0909341SAndroid Build Coastguard Worker    jg .pass2_loop
5064*c0909341SAndroid Build Coastguard Worker    ret
5065*c0909341SAndroid Build Coastguard Worker.write:
5066*c0909341SAndroid Build Coastguard Worker    mova             [r3+16*0], m7
5067*c0909341SAndroid Build Coastguard Worker    mov                     r4, -16*32
5068*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
5069*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, r4
5070*c0909341SAndroid Build Coastguard Worker.zero_loop:
5071*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+r4+16*0], m7
5072*c0909341SAndroid Build Coastguard Worker    mova      [coeffq+r4+16*1], m7
5073*c0909341SAndroid Build Coastguard Worker    add                     r4, 16*2
5074*c0909341SAndroid Build Coastguard Worker    jl .zero_loop
5075*c0909341SAndroid Build Coastguard Worker    call .write_main2
5076*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS        r3+16*11, 16
5077*c0909341SAndroid Build Coastguard Worker    call .write_main
5078*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS        r3+16*19, 16
5079*c0909341SAndroid Build Coastguard Worker    call .write_main
5080*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS        r3+16*27, 16
5081*c0909341SAndroid Build Coastguard Worker.write_main:
5082*c0909341SAndroid Build Coastguard Worker    mova             [r3+16*0], m7
5083*c0909341SAndroid Build Coastguard Worker.write_main2:
5084*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_2048)]
5085*c0909341SAndroid Build Coastguard Worker    REPX      {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6
5086*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m7, [r3+16*0]
5087*c0909341SAndroid Build Coastguard Worker    mova             [r3+16*2], m5
5088*c0909341SAndroid Build Coastguard Worker    mova             [r3+16*1], m6
5089*c0909341SAndroid Build Coastguard Worker    mova             [r3+16*0], m7
5090*c0909341SAndroid Build Coastguard Worker    WRITE_8X4                0, 1, 2, 3, 5, 6, 7
5091*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
5092*c0909341SAndroid Build Coastguard Worker    WRITE_8X4                4, [r3+16*2], [r3+16*1], [r3+16*0], 5, 6, 7
5093*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
5094*c0909341SAndroid Build Coastguard Worker    ret
5095*c0909341SAndroid Build Coastguard Worker
5096*c0909341SAndroid Build Coastguard Worker
5097*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5098*c0909341SAndroid Build Coastguard Workercglobal_label .main_fast
5099*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*35]     ;in1
5100*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4095x8)]        ;t62,t63
5101*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_101x8)]             ;t32,t33
5102*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
5103*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*35], m0                        ;t32
5104*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*66], m3                        ;t63
5105*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7,  401, 4076    ;t33a, t62a
5106*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*36], m3                        ;t33a
5107*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*65], m0                        ;t62a
5108*c0909341SAndroid Build Coastguard Worker
5109*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*37]     ;in15
5110*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3822x8)]        ;t60,t61
5111*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1474x8)]           ;t34,t35
5112*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*38], m1                        ;t35
5113*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*63], m2                        ;t60
5114*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 0, 3, 7, m4076, 401    ;t34a, t61a
5115*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*37], m2                        ;t34a
5116*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*64], m1                        ;t61a
5117*c0909341SAndroid Build Coastguard Worker
5118*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*39]     ;in9
5119*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3996x8)]        ;t58,t59
5120*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_897x8)]             ;t36,t37
5121*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*39], m0                        ;t36
5122*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*62], m3                        ;t59
5123*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7, 3166, 2598    ;t37a, t58a
5124*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*40], m3                        ;t37a
5125*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*61], m0                        ;t58a
5126*c0909341SAndroid Build Coastguard Worker
5127*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*41]     ;in7
5128*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_4036x8)]        ;t56,t57
5129*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m700x8)]            ;t38,t39
5130*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*42], m1                        ;t39
5131*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*59], m2                        ;t56
5132*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 0, 3, 7, m2598, 3166   ;t38a, t57a
5133*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*41], m2                        ;t38a
5134*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*60], m1                        ;t57a
5135*c0909341SAndroid Build Coastguard Worker
5136*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*43]     ;in5
5137*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4065x8)]        ;t54,t55
5138*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_501x8)]             ;t40,t41
5139*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*43], m0                        ;t40
5140*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*58], m3                        ;t55
5141*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7, 1931, 3612    ;t41a, t54a
5142*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*44], m3                        ;t41a
5143*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*57], m0                        ;t54a
5144*c0909341SAndroid Build Coastguard Worker
5145*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*45]     ;in11
5146*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3948x8)]        ;t52,t53
5147*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1092x8)]           ;t42,t43
5148*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*46], m1                        ;t43
5149*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*55], m2                        ;t52
5150*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 1, 0, 3, 7, m3612, 1931   ;t42a, t53a
5151*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*45], m2                        ;t42a
5152*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*56], m1                        ;t53a
5153*c0909341SAndroid Build Coastguard Worker
5154*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*47]     ;in13
5155*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3889x8)]        ;t50,t51
5156*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_1285x8)]            ;t44,t45
5157*c0909341SAndroid Build Coastguard Worker    mova                    m6, m0
5158*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*54], m3                        ;t51
5159*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 0, 1, 2, 7, 3920, 1189    ;t45a, t50a
5160*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*48], m3                        ;t45a
5161*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*53], m0                        ;t50a
5162*c0909341SAndroid Build Coastguard Worker
5163*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*49]     ;in3
5164*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4085x8)]        ;t48,t49
5165*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_m301x8)]            ;t46,t47
5166*c0909341SAndroid Build Coastguard Worker    mova                    m4, m3
5167*c0909341SAndroid Build Coastguard Worker    mova                    m5, m0
5168*c0909341SAndroid Build Coastguard Worker
5169*c0909341SAndroid Build Coastguard Worker    jmp .main2
5170*c0909341SAndroid Build Coastguard Worker
5171*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5172*c0909341SAndroid Build Coastguard Workercglobal_label .main
5173*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*35]     ;in1
5174*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*65]     ;in31
5175*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4095x8)]        ;t63a
5176*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_101x8)]             ;t32a
5177*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_2967x8)]        ;t62a
5178*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m2824x8)]           ;t33a
5179*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pd_2048)]
5180*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t33
5181*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t32
5182*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t62
5183*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t63
5184*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7,  401, 4076    ;t33a, t62a
5185*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*35], m0                        ;t32
5186*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*36], m5                        ;t33a
5187*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*65], m4                        ;t62a
5188*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*66], m3                        ;t63
5189*c0909341SAndroid Build Coastguard Worker
5190*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*63]     ;in17
5191*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*37]     ;in15
5192*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3745x8)]        ;t61a
5193*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_1660x8)]            ;t34a
5194*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3822x8)]        ;t60a
5195*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1474x8)]           ;t35a
5196*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m0                    ;t34
5197*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t35
5198*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                    ;t61
5199*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t60
5200*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m4076, 401    ;t34a, t61a
5201*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*37], m5                        ;t34a
5202*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*38], m0                        ;t35
5203*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*63], m3                        ;t60
5204*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*64], m4                        ;t61a
5205*c0909341SAndroid Build Coastguard Worker
5206*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*39]     ;in9
5207*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*61]     ;in23
5208*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3996x8)]        ;t59a
5209*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_897x8)]             ;t36a
5210*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3461x8)]        ;t58a
5211*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m2191x8)]           ;t37a
5212*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t37
5213*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t36
5214*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t58
5215*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t59
5216*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 3166, 2598    ;t37a, t58a
5217*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*39], m0                        ;t36
5218*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*40], m5                        ;t37a
5219*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*61], m4                        ;t58a
5220*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*62], m3                        ;t59
5221*c0909341SAndroid Build Coastguard Worker
5222*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*59]     ;in25
5223*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*41]     ;in7
5224*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3349x8)]        ;t57a
5225*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_2359x8)]            ;t38a
5226*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_4036x8)]        ;t56a
5227*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m700x8)]            ;t39a
5228*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m0                    ;t38
5229*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t39
5230*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                    ;t57
5231*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t56
5232*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m2598, 3166   ;t38a, t57a
5233*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*41], m5                        ;t38a
5234*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*42], m0                        ;t39
5235*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*59], m3                        ;t56
5236*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*60], m4                        ;t57a
5237*c0909341SAndroid Build Coastguard Worker
5238*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*43]     ;in5
5239*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*57]     ;in27
5240*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_4065x8)]        ;t55a
5241*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_501x8)]             ;t40a
5242*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3229x8)]        ;t54a
5243*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m2520x8)]           ;t41a
5244*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t41
5245*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t40
5246*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t54
5247*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t55
5248*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 1931, 3612    ;t41a, t54a
5249*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*43], m0                        ;t40
5250*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*44], m5                        ;t41a
5251*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*57], m4                        ;t54a
5252*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*58], m3                        ;t55
5253*c0909341SAndroid Build Coastguard Worker
5254*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*55]     ;in21
5255*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*45]     ;in11
5256*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3564x8)]        ;t53a
5257*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_2019x8)]            ;t42a
5258*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3948x8)]        ;t52a
5259*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1092x8)]           ;t43a
5260*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m1, m0                    ;t42
5261*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t43
5262*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m2, m3                    ;t53
5263*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t52
5264*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, m3612, 1931   ;t42a, t53a
5265*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*45], m5                        ;t42a
5266*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*46], m0                        ;t43
5267*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*55], m3                        ;t52
5268*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*56], m4                        ;t53a
5269*c0909341SAndroid Build Coastguard Worker
5270*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*47]     ;in13
5271*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*53]     ;in19
5272*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3889x8)]        ;t51a
5273*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_1285x8)]            ;t44a
5274*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_3659x8)]        ;t50a
5275*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m1842x8)]           ;t45a
5276*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m1                    ;t45
5277*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t44
5278*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m3, m2                    ;t50
5279*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t51
5280*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 1, 2, 7, 3920, 1189    ;t45a, t50a
5281*c0909341SAndroid Build Coastguard Worker    mova                    m6, m0
5282*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*48], m5                        ;t45a
5283*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*53], m4                        ;t50a
5284*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*54], m3                        ;t51
5285*c0909341SAndroid Build Coastguard Worker
5286*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*51]     ;in29
5287*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*49]     ;in3
5288*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m3, m0, [o(pw_3102x8)]        ;t49a
5289*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, [o(pw_2675x8)]            ;t46a
5290*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m2, m1, [o(pw_4085x8)]        ;t48a
5291*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m1, [o(pw_m301x8)]            ;t47a
5292*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m0                    ;t46
5293*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m1                        ;t47
5294*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m2, m3                    ;t49
5295*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m2                        ;t48
5296*c0909341SAndroid Build Coastguard Worker
5297*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5298*c0909341SAndroid Build Coastguard Worker.main2:
5299*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            4, 5, 1, 2, 7, m1189, 3920   ;t46a, t49a
5300*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*54]     ;t51
5301*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m6                    ;t44a
5302*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m6                        ;t47a
5303*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m3, m1                    ;t51a
5304*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t48a
5305*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*50], m0                        ;t47a
5306*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*51], m3                        ;t48a
5307*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            6, 2, 0, 3, 7, m2276, 3406   ;t44, t51
5308*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*47], m6                        ;t44
5309*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*54], m2                        ;t51
5310*c0909341SAndroid Build Coastguard Worker
5311*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*48]     ;t45a
5312*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*53]     ;t50a
5313*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m4, m0                    ;t45
5314*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m0                        ;t46
5315*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m5, m3                    ;t50
5316*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m3                        ;t49
5317*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            6, 2, 0, 3, 7, m2276, 3406   ;t45a, t50a
5318*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*48], m6                        ;t45a
5319*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*49], m4                        ;t46
5320*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*52], m5                        ;t49
5321*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*53], m2                        ;t50a
5322*c0909341SAndroid Build Coastguard Worker
5323*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*43]     ;t40
5324*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*46]     ;t43
5325*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*55]     ;t52
5326*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*58]     ;t55
5327*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t43a
5328*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t40a
5329*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t52a
5330*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t55a
5331*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, 3406, 2276    ;t43, t52
5332*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*43], m0                        ;t40a
5333*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*46], m5                        ;t43
5334*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*55], m4                        ;t52
5335*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*58], m1                        ;t55a
5336*c0909341SAndroid Build Coastguard Worker
5337*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*44]     ;t41a
5338*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*45]     ;t42a
5339*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*56]     ;t53a
5340*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*57]     ;t54a
5341*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t42
5342*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t41
5343*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t53
5344*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t54
5345*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, 3406, 2276    ;t42a, t53a
5346*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*44], m0                        ;t41
5347*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*45], m5                        ;t42a
5348*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*56], m4                        ;t53a
5349*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*57], m1                        ;t54
5350*c0909341SAndroid Build Coastguard Worker
5351*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*41]     ;t38a
5352*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*40]     ;t37a
5353*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*61]     ;t58a
5354*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*60]     ;t57a
5355*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t37
5356*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t38
5357*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t58
5358*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t57
5359*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m4017, 799    ;t37a, t58a
5360*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*41], m0                        ;t38
5361*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*40], m5                        ;t37a
5362*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*61], m4                        ;t58a
5363*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*60], m1                        ;t57
5364*c0909341SAndroid Build Coastguard Worker
5365*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*42]     ;t39
5366*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*39]     ;t36
5367*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*62]     ;t59
5368*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*59]     ;t56
5369*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t36a
5370*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t39a
5371*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t59a
5372*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t56a
5373*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m4017, 799    ;t36, t59
5374*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*42], m0                        ;t39a
5375*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*39], m5                        ;t36
5376*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*62], m4                        ;t59
5377*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*59], m1                        ;t56a
5378*c0909341SAndroid Build Coastguard Worker
5379*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*35]     ;t32
5380*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*38]     ;t35
5381*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*63]     ;t60
5382*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*66]     ;t63
5383*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t35a
5384*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t32a
5385*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t60a
5386*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t63a
5387*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7,  799, 4017    ;t35, t60
5388*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*35], m0                        ;t32a
5389*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*38], m5                        ;t35
5390*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*63], m4                        ;t60
5391*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*66], m1                        ;t63a
5392*c0909341SAndroid Build Coastguard Worker
5393*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*36]     ;t33a
5394*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*37]     ;t34a
5395*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*64]     ;t61a
5396*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*65]     ;t62a
5397*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t34
5398*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t33
5399*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t61
5400*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t62
5401*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7,  799, 4017    ;t34a, t61a
5402*c0909341SAndroid Build Coastguard Worker
5403*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*41]     ;t38
5404*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*60]     ;t57
5405*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m0, m2                    ;t38a
5406*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t33a
5407*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m1, m3                    ;t57a
5408*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t62a
5409*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*36], m0                        ;t33a
5410*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*65], m1                        ;t62a
5411*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            2, 6, 0, 3, 7, 1567, 3784    ;t38, t57
5412*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*41], m2                        ;t38
5413*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*60], m6                        ;t57
5414*c0909341SAndroid Build Coastguard Worker
5415*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*40]     ;t37
5416*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*61]     ;t58
5417*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m5, m2                    ;t37
5418*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m2                        ;t34
5419*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m4, m3                    ;t58
5420*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m3                        ;t61
5421*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            1, 0, 2, 3, 7, 1567, 3784    ;t37a, t58a
5422*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*37], m5                        ;t34
5423*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*64], m4                        ;t61
5424*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*40], m1                        ;t37a
5425*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*61], m0                        ;t58a
5426*c0909341SAndroid Build Coastguard Worker
5427*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*38]     ;t35
5428*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*39]     ;t36
5429*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*62]     ;t59
5430*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*63]     ;t60
5431*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t36a
5432*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t35a
5433*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t59a
5434*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t60a
5435*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, 1567, 3784    ;t36, t59
5436*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*38], m0                        ;t35a
5437*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*39], m5                        ;t36
5438*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*62], m4                        ;t59
5439*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*63], m1                        ;t60a
5440*c0909341SAndroid Build Coastguard Worker
5441*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*35]     ;t32a
5442*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*42]     ;t39a
5443*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*59]     ;t56a
5444*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*66]     ;t63a
5445*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t39
5446*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t32
5447*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t56
5448*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t63
5449*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, 1567, 3784    ;t39a, t56a
5450*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*35], m0                        ;t32
5451*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*42], m5                        ;t39a
5452*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*59], m4                        ;t56a
5453*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*66], m1                        ;t63
5454*c0909341SAndroid Build Coastguard Worker
5455*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*50]     ;t47a
5456*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*43]     ;t40a
5457*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*58]     ;t55a
5458*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*51]     ;t48a
5459*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t40
5460*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t47
5461*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t55
5462*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t48
5463*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m3784, 1567   ;t40a, t55a
5464*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*50], m0                        ;t47
5465*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*43], m5                        ;t40a
5466*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*58], m4                        ;t55a
5467*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*51], m1                        ;t48
5468*c0909341SAndroid Build Coastguard Worker
5469*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*49]     ;t46
5470*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*44]     ;t41
5471*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*57]     ;t54
5472*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*52]     ;t49
5473*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t41a
5474*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t46a
5475*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t54a
5476*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t49a
5477*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m3784, 1567   ;t41, t54
5478*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*49], m0                        ;t46a
5479*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*44], m5                        ;t41
5480*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*57], m4                        ;t54
5481*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*52], m1                        ;t49a
5482*c0909341SAndroid Build Coastguard Worker
5483*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*48]     ;t45a
5484*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*45]     ;t42a
5485*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*56]     ;t53a
5486*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*53]     ;t50a
5487*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t42
5488*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t45
5489*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t53
5490*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t50
5491*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m3784, 1567   ;t42a, t53a
5492*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*48], m0                        ;t45
5493*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*45], m5                        ;t42a
5494*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*56], m4                        ;t53a
5495*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*53], m1                        ;t50
5496*c0909341SAndroid Build Coastguard Worker
5497*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*47]     ;t44
5498*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*46]     ;t43
5499*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*55]     ;t52
5500*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*54]     ;t51
5501*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m2                    ;t43a
5502*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m2                        ;t44a
5503*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t52a
5504*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t51a
5505*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 2, 3, 7, m3784, 1567   ;t43, t52
5506*c0909341SAndroid Build Coastguard Worker
5507*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*38]     ;t35a
5508*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*31]     ;tmp[28]
5509*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;t44
5510*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;t35
5511*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m2                    ;out35
5512*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m3                        ;out28
5513*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*63]     ;t60a
5514*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*38], m0                        ;out35
5515*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*31], m2                        ;out28
5516*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;t51
5517*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;t60
5518*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            0, 6, 1, 2, 7, 2896, 2896    ;t44a, t51a
5519*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*6 ]     ;tmp[3]
5520*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m2, m3                    ;out60
5521*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m3                        ;out3
5522*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*22]     ;tmp[19]
5523*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*63], m1                        ;out60
5524*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*6 ], m2                        ;out3
5525*c0909341SAndroid Build Coastguard Worker    psubsw                  m1, m3, m0                    ;out44
5526*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m0                        ;out19
5527*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*15]     ;tmp[12]
5528*c0909341SAndroid Build Coastguard Worker
5529*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*39]     ;t36
5530*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*47], m1                        ;out44
5531*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*22], m3                        ;out19
5532*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*62]     ;t59
5533*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m2, m6                    ;out51
5534*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m6                        ;out12
5535*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*54], m3                        ;out51
5536*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*15], m2                        ;out12
5537*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;t43a
5538*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t36a
5539*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*30]     ;tmp[27]
5540*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;t52a
5541*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;t59a
5542*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            3, 2, 4, 6, 7, 2896, 2896    ;t43, t52
5543*c0909341SAndroid Build Coastguard Worker    mova                    m4, [rsp+gprsize*2+16*7 ]     ;tmp[4 ]
5544*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m5, m0                    ;out36
5545*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m0                        ;out27
5546*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m4, m1                    ;out59
5547*c0909341SAndroid Build Coastguard Worker    paddsw                  m4, m1                        ;out4
5548*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*39], m6                        ;out36
5549*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*30], m5                        ;out27
5550*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*62], m0                        ;out59
5551*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*7 ], m4                        ;out4
5552*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*23]     ;tmp[20]
5553*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*14]     ;tmp[11]
5554*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m3                    ;out43
5555*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m3                        ;out20
5556*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m5, m2                    ;out52
5557*c0909341SAndroid Build Coastguard Worker    paddsw                  m5, m2                        ;out11
5558*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*46], m4                        ;out43
5559*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*23], m0                        ;out20
5560*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*55], m6                        ;out52
5561*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*14], m5                        ;out11
5562*c0909341SAndroid Build Coastguard Worker
5563*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*40]     ;t37a
5564*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*45]     ;t42a
5565*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*56]     ;t53a
5566*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*61]     ;t58a
5567*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*29]     ;tmp[26]
5568*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t42
5569*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t37
5570*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t53
5571*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t58
5572*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t43, t52
5573*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*8 ]     ;tmp[5 ]
5574*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out37
5575*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out26
5576*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out58
5577*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out5
5578*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*40], m6                        ;out37
5579*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*29], m2                        ;out26
5580*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*61], m0                        ;out58
5581*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*8 ], m3                        ;out5
5582*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*24]     ;tmp[21]
5583*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*13]     ;tmp[10]
5584*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out42
5585*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out21
5586*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out53
5587*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out10
5588*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*45], m2                        ;out42
5589*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*24], m0                        ;out21
5590*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*56], m3                        ;out53
5591*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*13], m1                        ;out10
5592*c0909341SAndroid Build Coastguard Worker
5593*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*41]     ;t38
5594*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*44]     ;t41
5595*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*57]     ;t54
5596*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*60]     ;t57
5597*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*28]     ;tmp[25]
5598*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t41a
5599*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t38a
5600*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t54a
5601*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t57a
5602*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t41a, t54a
5603*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*9 ]     ;tmp[6 ]
5604*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out38
5605*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out25
5606*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out57
5607*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out6
5608*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*41], m6                        ;out38
5609*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*28], m2                        ;out25
5610*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*60], m0                        ;out57
5611*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*9 ], m3                        ;out6
5612*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*25]     ;tmp[22]
5613*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*12]     ;tmp[9 ]
5614*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out41
5615*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out22
5616*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out54
5617*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out9
5618*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*44], m2                        ;out41
5619*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*25], m0                        ;out22
5620*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*57], m3                        ;out54
5621*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*12], m1                        ;out9
5622*c0909341SAndroid Build Coastguard Worker
5623*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*42]     ;t39a
5624*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*43]     ;t40a
5625*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*58]     ;t55a
5626*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*59]     ;t56a
5627*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*27]     ;tmp[24]
5628*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t40
5629*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t39
5630*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t55
5631*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t56
5632*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t40a, t55a
5633*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*10]     ;tmp[7 ]
5634*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out39
5635*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out24
5636*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out56
5637*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out7
5638*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*42], m6                        ;out39
5639*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*27], m2                        ;out24
5640*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*59], m0                        ;out56
5641*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*10], m3                        ;out7
5642*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*26]     ;tmp[23]
5643*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*11]     ;tmp[8 ]
5644*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out40
5645*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out23
5646*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out55
5647*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out8
5648*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*43], m2                        ;out40
5649*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*26], m0                        ;out23
5650*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*58], m3                        ;out55
5651*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*11], m1                        ;out8
5652*c0909341SAndroid Build Coastguard Worker
5653*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*37]     ;t34
5654*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*48]     ;t45
5655*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*53]     ;t50
5656*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*64]     ;t61
5657*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*32]     ;tmp[29]
5658*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t45a
5659*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t34a
5660*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t50a
5661*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t61a
5662*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t45, t50
5663*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*5 ]     ;tmp[2 ]
5664*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out34
5665*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out29
5666*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out61
5667*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out2
5668*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*37], m6                        ;out34
5669*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*32], m2                        ;out29
5670*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*64], m0                        ;out61
5671*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*5 ], m3                        ;out2
5672*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*21]     ;tmp[18]
5673*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*16]     ;tmp[13]
5674*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out45
5675*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out18
5676*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out50
5677*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out13
5678*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*48], m2                        ;out45
5679*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*21], m0                        ;out18
5680*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*53], m3                        ;out50
5681*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*16], m1                        ;out13
5682*c0909341SAndroid Build Coastguard Worker
5683*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*36]     ;t33a
5684*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*49]     ;t46a
5685*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*52]     ;t49a
5686*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*65]     ;t62a
5687*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*33]     ;tmp[30]
5688*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t46
5689*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t33
5690*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t49
5691*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t62
5692*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t45, t50
5693*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*4 ]     ;tmp[1 ]
5694*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out33
5695*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out30
5696*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out62
5697*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out1
5698*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*36], m6                        ;out33
5699*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*33], m2                        ;out30
5700*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*65], m0                        ;out62
5701*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*4 ], m3                        ;out1
5702*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*20]     ;tmp[17]
5703*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*17]     ;tmp[14]
5704*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out46
5705*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out17
5706*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out49
5707*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out14
5708*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*49], m2                        ;out46
5709*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*20], m0                        ;out17
5710*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*52], m3                        ;out49
5711*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*17], m1                        ;out14
5712*c0909341SAndroid Build Coastguard Worker
5713*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*35]     ;t32
5714*c0909341SAndroid Build Coastguard Worker    mova                    m5, [rsp+gprsize*2+16*50]     ;t47
5715*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*51]     ;t48
5716*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*66]     ;t63
5717*c0909341SAndroid Build Coastguard Worker    mova                    m2, [rsp+gprsize*2+16*34]     ;tmp[31]
5718*c0909341SAndroid Build Coastguard Worker    psubsw                  m4, m0, m5                    ;t47a
5719*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;t32a
5720*c0909341SAndroid Build Coastguard Worker    psubsw                  m5, m1, m3                    ;t48a
5721*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m3                        ;t63a
5722*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2W            5, 4, 3, 6, 7, 2896, 2896    ;t47, t48
5723*c0909341SAndroid Build Coastguard Worker    mova                    m3, [rsp+gprsize*2+16*3 ]     ;tmp[0 ]
5724*c0909341SAndroid Build Coastguard Worker    psubsw                  m6, m2, m0                    ;out32
5725*c0909341SAndroid Build Coastguard Worker    paddsw                  m2, m0                        ;out31
5726*c0909341SAndroid Build Coastguard Worker    psubsw                  m0, m3, m1                    ;out63
5727*c0909341SAndroid Build Coastguard Worker    paddsw                  m3, m1                        ;out0
5728*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*35], m6                        ;out32
5729*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*34], m2                        ;out31
5730*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*66], m0                        ;out63
5731*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*3 ], m3                        ;out0
5732*c0909341SAndroid Build Coastguard Worker    mova                    m0, [rsp+gprsize*2+16*19]     ;tmp[16]
5733*c0909341SAndroid Build Coastguard Worker    mova                    m1, [rsp+gprsize*2+16*18]     ;tmp[15]
5734*c0909341SAndroid Build Coastguard Worker    psubsw                  m2, m0, m5                    ;out47
5735*c0909341SAndroid Build Coastguard Worker    paddsw                  m0, m5                        ;out16
5736*c0909341SAndroid Build Coastguard Worker    psubsw                  m3, m1, m4                    ;out48
5737*c0909341SAndroid Build Coastguard Worker    paddsw                  m1, m4                        ;out15
5738*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*50], m2                        ;out47
5739*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*19], m0                        ;out16
5740*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*51], m3                        ;out48
5741*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize*2+16*18], m1                        ;out15
5742*c0909341SAndroid Build Coastguard Worker    ret
5743*c0909341SAndroid Build Coastguard Worker
5744*c0909341SAndroid Build Coastguard Worker
5745*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_8bpc, 4, 6, 8, 16*132, dst, stride, coeff, eob, tx2
5746*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
5747*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
5748*c0909341SAndroid Build Coastguard Worker%endif
5749*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
5750*c0909341SAndroid Build Coastguard Worker    jz .dconly
5751*c0909341SAndroid Build Coastguard Worker
5752*c0909341SAndroid Build Coastguard Worker    call m(idct_64x16_internal_8bpc)
5753*c0909341SAndroid Build Coastguard Worker    RET
5754*c0909341SAndroid Build Coastguard Worker
5755*c0909341SAndroid Build Coastguard Worker.dconly:
5756*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
5757*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
5758*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_8192)]
5759*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
5760*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 16
5761*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
5762*c0909341SAndroid Build Coastguard Worker
5763*c0909341SAndroid Build Coastguard Worker.body:
5764*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m2
5765*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_2048)]  ;intentionally rip-relative
5766*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
5767*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m2
5768*c0909341SAndroid Build Coastguard Worker    pshuflw                 m0, m0, q0000
5769*c0909341SAndroid Build Coastguard Worker    punpcklwd               m0, m0
5770*c0909341SAndroid Build Coastguard Worker    pxor                    m7, m7
5771*c0909341SAndroid Build Coastguard Worker
5772*c0909341SAndroid Build Coastguard Worker.loop:
5773*c0909341SAndroid Build Coastguard Worker    mova                    m1, [dstq+16*0]
5774*c0909341SAndroid Build Coastguard Worker    mova                    m3, [dstq+16*1]
5775*c0909341SAndroid Build Coastguard Worker    mova                    m5, [dstq+16*2]
5776*c0909341SAndroid Build Coastguard Worker    mova                    m6, [dstq+16*3]
5777*c0909341SAndroid Build Coastguard Worker    punpckhbw               m2, m1, m7
5778*c0909341SAndroid Build Coastguard Worker    punpcklbw               m1, m7
5779*c0909341SAndroid Build Coastguard Worker    punpckhbw               m4, m3, m7
5780*c0909341SAndroid Build Coastguard Worker    punpcklbw               m3, m7
5781*c0909341SAndroid Build Coastguard Worker    paddw                   m2, m0
5782*c0909341SAndroid Build Coastguard Worker    paddw                   m1, m0
5783*c0909341SAndroid Build Coastguard Worker    paddw                   m4, m0
5784*c0909341SAndroid Build Coastguard Worker    paddw                   m3, m0
5785*c0909341SAndroid Build Coastguard Worker    packuswb                m1, m2
5786*c0909341SAndroid Build Coastguard Worker    packuswb                m3, m4
5787*c0909341SAndroid Build Coastguard Worker    punpckhbw               m2, m5, m7
5788*c0909341SAndroid Build Coastguard Worker    punpcklbw               m5, m7
5789*c0909341SAndroid Build Coastguard Worker    punpckhbw               m4, m6, m7
5790*c0909341SAndroid Build Coastguard Worker    punpcklbw               m6, m7
5791*c0909341SAndroid Build Coastguard Worker    paddw                   m2, m0
5792*c0909341SAndroid Build Coastguard Worker    paddw                   m5, m0
5793*c0909341SAndroid Build Coastguard Worker    paddw                   m4, m0
5794*c0909341SAndroid Build Coastguard Worker    paddw                   m6, m0
5795*c0909341SAndroid Build Coastguard Worker    packuswb                m5, m2
5796*c0909341SAndroid Build Coastguard Worker    packuswb                m6, m4
5797*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*0], m1
5798*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*1], m3
5799*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*2], m5
5800*c0909341SAndroid Build Coastguard Worker    mova           [dstq+16*3], m6
5801*c0909341SAndroid Build Coastguard Worker    add                   dstq, strideq
5802*c0909341SAndroid Build Coastguard Worker    dec                    r3d
5803*c0909341SAndroid Build Coastguard Worker    jg .loop
5804*c0909341SAndroid Build Coastguard Worker    jmp                   tx2q
5805*c0909341SAndroid Build Coastguard Worker
5806*c0909341SAndroid Build Coastguard Worker.end:
5807*c0909341SAndroid Build Coastguard Worker    RET
5808*c0909341SAndroid Build Coastguard Worker
5809*c0909341SAndroid Build Coastguard Worker
5810*c0909341SAndroid Build Coastguard Worker%macro LOAD_4ROWS 2-3 0 ;src, stride, is_rect2
5811*c0909341SAndroid Build Coastguard Worker
5812*c0909341SAndroid Build Coastguard Worker%if %3
5813*c0909341SAndroid Build Coastguard Worker    mova                 m3, [o(pw_2896x8)]
5814*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, [%1+%2*0]
5815*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, [%1+%2*1]
5816*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m3, [%1+%2*2]
5817*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, [%1+%2*3]
5818*c0909341SAndroid Build Coastguard Worker%else
5819*c0909341SAndroid Build Coastguard Worker    mova                 m0, [%1+%2*0]
5820*c0909341SAndroid Build Coastguard Worker    mova                 m1, [%1+%2*1]
5821*c0909341SAndroid Build Coastguard Worker    mova                 m2, [%1+%2*2]
5822*c0909341SAndroid Build Coastguard Worker    mova                 m3, [%1+%2*3]
5823*c0909341SAndroid Build Coastguard Worker%endif
5824*c0909341SAndroid Build Coastguard Worker%endmacro
5825*c0909341SAndroid Build Coastguard Worker
5826*c0909341SAndroid Build Coastguard Worker%macro LOAD_4ROWS_H 2 ;src, stride
5827*c0909341SAndroid Build Coastguard Worker    mova                 m4, [%1+%2*0]
5828*c0909341SAndroid Build Coastguard Worker    mova                 m5, [%1+%2*1]
5829*c0909341SAndroid Build Coastguard Worker    mova                 m6, [%1+%2*2]
5830*c0909341SAndroid Build Coastguard Worker    mova                 m7, [%1+%2*3]
5831*c0909341SAndroid Build Coastguard Worker%endmacro
5832*c0909341SAndroid Build Coastguard Worker
5833*c0909341SAndroid Build Coastguard Workercglobal idct_64x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
5834*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 2
5835*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], dstq
5836*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [rsp+gprsize+16*68]
5837*c0909341SAndroid Build Coastguard Worker
5838*c0909341SAndroid Build Coastguard Worker.pass1_loop:
5839*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+32*0, 32*8
5840*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
5841*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
5842*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
5843*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
5844*c0909341SAndroid Build Coastguard Worker
5845*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
5846*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+32*4, 32*8
5847*c0909341SAndroid Build Coastguard Worker
5848*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
5849*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
5850*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
5851*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
5852*c0909341SAndroid Build Coastguard Worker
5853*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+32*2, 32*4
5854*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0
5855*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1
5856*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2
5857*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3
5858*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4
5859*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5
5860*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6
5861*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7
5862*c0909341SAndroid Build Coastguard Worker
5863*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
5864*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    rsp+gprsize+16*3, 16
5865*c0909341SAndroid Build Coastguard Worker
5866*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+32*1, 32*2
5867*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*35], m0                        ;in1
5868*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*49], m1                        ;in3
5869*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*43], m2                        ;in5
5870*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*41], m3                        ;in7
5871*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*39], m4                        ;in9
5872*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*45], m5                        ;in11
5873*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*47], m6                        ;in13
5874*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*37], m7                        ;in15
5875*c0909341SAndroid Build Coastguard Worker
5876*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+32*17, 32*2
5877*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*63], m0                        ;in17
5878*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*53], m1                        ;in19
5879*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*55], m2                        ;in21
5880*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*61], m3                        ;in23
5881*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*59], m4                        ;in25
5882*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*57], m5                        ;in27
5883*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*51], m6                        ;in29
5884*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*65], m7                        ;in31
5885*c0909341SAndroid Build Coastguard Worker
5886*c0909341SAndroid Build Coastguard Worker    call m(idct_16x64_internal_8bpc).main
5887*c0909341SAndroid Build Coastguard Worker
5888*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
5889*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5890*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5891*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
5892*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5893*c0909341SAndroid Build Coastguard Worker
5894*c0909341SAndroid Build Coastguard Worker.pass1_end:
5895*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+32*0, 32
5896*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
5897*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5898*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5899*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
5900*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5901*c0909341SAndroid Build Coastguard Worker
5902*c0909341SAndroid Build Coastguard Worker.pass1_end1:
5903*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+32*8, 32
5904*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
5905*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5906*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5907*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
5908*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5909*c0909341SAndroid Build Coastguard Worker
5910*c0909341SAndroid Build Coastguard Worker.pass1_end2:
5911*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+32*16, 32
5912*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
5913*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5914*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5915*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
5916*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5917*c0909341SAndroid Build Coastguard Worker
5918*c0909341SAndroid Build Coastguard Worker.pass1_end3:
5919*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+32*24, 32
5920*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*35, 16
5921*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5922*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5923*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
5924*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5925*c0909341SAndroid Build Coastguard Worker
5926*c0909341SAndroid Build Coastguard Worker.pass1_end4:
5927*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+32*0, 32
5928*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*43, 16
5929*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5930*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5931*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end5)]
5932*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5933*c0909341SAndroid Build Coastguard Worker
5934*c0909341SAndroid Build Coastguard Worker.pass1_end5:
5935*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+32*8, 32
5936*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*51, 16
5937*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5938*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5939*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end6)]
5940*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5941*c0909341SAndroid Build Coastguard Worker
5942*c0909341SAndroid Build Coastguard Worker.pass1_end6:
5943*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+32*16, 32
5944*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*59, 16
5945*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
5946*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
5947*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end7)]
5948*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
5949*c0909341SAndroid Build Coastguard Worker
5950*c0909341SAndroid Build Coastguard Worker.pass1_end7:
5951*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+32*24, 32
5952*c0909341SAndroid Build Coastguard Worker
5953*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
5954*c0909341SAndroid Build Coastguard Worker    add                   dstq, 16
5955*c0909341SAndroid Build Coastguard Worker    dec                    r3d
5956*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
5957*c0909341SAndroid Build Coastguard Worker
5958*c0909341SAndroid Build Coastguard Worker.pass2:
5959*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
5960*c0909341SAndroid Build Coastguard Worker    sub                 coeffq, 32
5961*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
5962*c0909341SAndroid Build Coastguard Worker
5963*c0909341SAndroid Build Coastguard Worker.pass2_loop:
5964*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], r3d
5965*c0909341SAndroid Build Coastguard Worker
5966*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+16*0, 32*2
5967*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS_H   coeffq+16*1, 32*2
5968*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
5969*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
5970*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+16*2, 32*2
5971*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS_H   coeffq+16*3, 32*2
5972*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
5973*c0909341SAndroid Build Coastguard Worker
5974*c0909341SAndroid Build Coastguard Worker    mov                    r3, dstq
5975*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end)]
5976*c0909341SAndroid Build Coastguard Worker    lea                  dstq, [dstq+strideq*8]
5977*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
5978*c0909341SAndroid Build Coastguard Worker
5979*c0909341SAndroid Build Coastguard Worker.end:
5980*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
5981*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
5982*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end1)]
5983*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
5984*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
5985*c0909341SAndroid Build Coastguard Worker
5986*c0909341SAndroid Build Coastguard Worker.end1:
5987*c0909341SAndroid Build Coastguard Worker    pxor                   m7, m7
5988*c0909341SAndroid Build Coastguard Worker    REPX  {mova [coeffq+16*x], m7}, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15
5989*c0909341SAndroid Build Coastguard Worker
5990*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*16
5991*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*1+16*67]
5992*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
5993*c0909341SAndroid Build Coastguard Worker    add                   dstq, 8
5994*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], dstq
5995*c0909341SAndroid Build Coastguard Worker    dec                    r3d
5996*c0909341SAndroid Build Coastguard Worker    jg .pass2_loop
5997*c0909341SAndroid Build Coastguard Worker
5998*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
5999*c0909341SAndroid Build Coastguard Worker    lea                 coeffq, [rsp+gprsize+16*68]
6000*c0909341SAndroid Build Coastguard Worker.pass2_loop2:
6001*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], r3d
6002*c0909341SAndroid Build Coastguard Worker
6003*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+16*0, 32*2
6004*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS_H   coeffq+16*1, 32*2
6005*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
6006*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
6007*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+16*2, 32*2
6008*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS_H   coeffq+16*3, 32*2
6009*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6010*c0909341SAndroid Build Coastguard Worker
6011*c0909341SAndroid Build Coastguard Worker    mov                    r3, dstq
6012*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end2)]
6013*c0909341SAndroid Build Coastguard Worker    lea                  dstq, [dstq+strideq*8]
6014*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
6015*c0909341SAndroid Build Coastguard Worker
6016*c0909341SAndroid Build Coastguard Worker.end2:
6017*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*3, 16
6018*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*0], m7
6019*c0909341SAndroid Build Coastguard Worker    lea                  tx2q, [o(.end3)]
6020*c0909341SAndroid Build Coastguard Worker    mov                  dstq, r3
6021*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_8bpc).end
6022*c0909341SAndroid Build Coastguard Worker
6023*c0909341SAndroid Build Coastguard Worker.end3:
6024*c0909341SAndroid Build Coastguard Worker
6025*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*16
6026*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*1+16*67]
6027*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
6028*c0909341SAndroid Build Coastguard Worker    add                   dstq, 8
6029*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], dstq
6030*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6031*c0909341SAndroid Build Coastguard Worker    jg .pass2_loop2
6032*c0909341SAndroid Build Coastguard Worker    ret
6033*c0909341SAndroid Build Coastguard Worker
6034*c0909341SAndroid Build Coastguard Worker
6035*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_8bpc, 4, 6, 8, 16*68, dst, stride, coeff, eob, tx2
6036*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6037*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6038*c0909341SAndroid Build Coastguard Worker%endif
6039*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
6040*c0909341SAndroid Build Coastguard Worker    jz .dconly
6041*c0909341SAndroid Build Coastguard Worker    call m(idct_32x64_internal_8bpc)
6042*c0909341SAndroid Build Coastguard Worker.end:
6043*c0909341SAndroid Build Coastguard Worker    RET
6044*c0909341SAndroid Build Coastguard Worker
6045*c0909341SAndroid Build Coastguard Worker.dconly:
6046*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
6047*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
6048*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_16384)]
6049*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
6050*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
6051*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 64
6052*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
6053*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body
6054*c0909341SAndroid Build Coastguard Worker
6055*c0909341SAndroid Build Coastguard Worker
6056*c0909341SAndroid Build Coastguard Workercglobal idct_32x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
6057*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
6058*c0909341SAndroid Build Coastguard Worker    sub                   eobd, 136
6059*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], eobd
6060*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6061*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d
6062*c0909341SAndroid Build Coastguard Worker
6063*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6064*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6065*c0909341SAndroid Build Coastguard Worker%endif
6066*c0909341SAndroid Build Coastguard Worker
6067*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], coeffq
6068*c0909341SAndroid Build Coastguard Worker
6069*c0909341SAndroid Build Coastguard Worker.pass1_loop:
6070*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*1, 64*2, 1
6071*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0                        ;in1
6072*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1                        ;in3
6073*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2                        ;in5
6074*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3                        ;in7
6075*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4                        ;in9
6076*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5                        ;in11
6077*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6                        ;in13
6078*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7                        ;in15
6079*c0909341SAndroid Build Coastguard Worker
6080*c0909341SAndroid Build Coastguard Worker    mov                   tx2d, [rsp+gprsize*1+16*67]
6081*c0909341SAndroid Build Coastguard Worker    test                  tx2d, tx2d
6082*c0909341SAndroid Build Coastguard Worker    jl .fast
6083*c0909341SAndroid Build Coastguard Worker
6084*c0909341SAndroid Build Coastguard Worker.full:
6085*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*0, 64*4, 1
6086*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
6087*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
6088*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*2, 64*4, 1
6089*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6090*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
6091*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
6092*c0909341SAndroid Build Coastguard Worker
6093*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+64*17, 64*2, 1
6094*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*33], m0                        ;in17
6095*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*28], m1                        ;in19
6096*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*29], m2                        ;in21
6097*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*32], m3                        ;in23
6098*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*31], m4                        ;in25
6099*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*30], m5                        ;in27
6100*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*27], m6                        ;in29
6101*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*34], m7                        ;in31
6102*c0909341SAndroid Build Coastguard Worker
6103*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main
6104*c0909341SAndroid Build Coastguard Worker    jmp .pass1_end
6105*c0909341SAndroid Build Coastguard Worker
6106*c0909341SAndroid Build Coastguard Worker.fast:
6107*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS          coeffq, 256, 1
6108*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6109*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6110*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
6111*c0909341SAndroid Build Coastguard Worker
6112*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
6113*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS    coeffq+128*1, 256, 1
6114*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6115*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6116*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6117*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
6118*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
6119*c0909341SAndroid Build Coastguard Worker
6120*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
6121*c0909341SAndroid Build Coastguard Worker
6122*c0909341SAndroid Build Coastguard Worker.pass1_end:
6123*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6124*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
6125*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6126*c0909341SAndroid Build Coastguard Worker
6127*c0909341SAndroid Build Coastguard Worker.pass1_end1:
6128*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*0, 64
6129*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
6130*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6131*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
6132*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6133*c0909341SAndroid Build Coastguard Worker
6134*c0909341SAndroid Build Coastguard Worker.pass1_end2:
6135*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*8, 64
6136*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
6137*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6138*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
6139*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6140*c0909341SAndroid Build Coastguard Worker
6141*c0909341SAndroid Build Coastguard Worker.pass1_end3:
6142*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*16, 64
6143*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
6144*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6145*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
6146*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6147*c0909341SAndroid Build Coastguard Worker
6148*c0909341SAndroid Build Coastguard Worker.pass1_end4:
6149*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*24, 64
6150*c0909341SAndroid Build Coastguard Worker
6151*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
6152*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6153*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
6154*c0909341SAndroid Build Coastguard Worker
6155*c0909341SAndroid Build Coastguard Worker.pass2:
6156*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*67]
6157*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6158*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
6159*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
6160*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(m(idct_16x64_internal_8bpc).end1)]
6161*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x64_internal_8bpc).pass2_loop
6162*c0909341SAndroid Build Coastguard Worker
6163*c0909341SAndroid Build Coastguard Worker
6164*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_8bpc, 4, 6, 8, 16*197, dst, stride, coeff, eob, tx2
6165*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6166*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6167*c0909341SAndroid Build Coastguard Worker%endif
6168*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
6169*c0909341SAndroid Build Coastguard Worker    jz .dconly
6170*c0909341SAndroid Build Coastguard Worker    call m(idct_64x32_internal_8bpc)
6171*c0909341SAndroid Build Coastguard Worker.end:
6172*c0909341SAndroid Build Coastguard Worker    RET
6173*c0909341SAndroid Build Coastguard Worker
6174*c0909341SAndroid Build Coastguard Worker.dconly:
6175*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
6176*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
6177*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_16384)]
6178*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1
6179*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
6180*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 32
6181*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.end)]
6182*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_64x16_8bpc).body
6183*c0909341SAndroid Build Coastguard Worker
6184*c0909341SAndroid Build Coastguard Worker
6185*c0909341SAndroid Build Coastguard Workercglobal idct_64x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
6186*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
6187*c0909341SAndroid Build Coastguard Worker    sub                   eobd, 136
6188*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], eobd
6189*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6190*c0909341SAndroid Build Coastguard Worker    cmovs                  r3d, r4d
6191*c0909341SAndroid Build Coastguard Worker
6192*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6193*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6194*c0909341SAndroid Build Coastguard Worker%endif
6195*c0909341SAndroid Build Coastguard Worker
6196*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], coeffq
6197*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*3+16*67], dstq
6198*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [rsp+gprsize+16*69]
6199*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*4+16*67], dstq
6200*c0909341SAndroid Build Coastguard Worker
6201*c0909341SAndroid Build Coastguard Worker.pass1_loop:
6202*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+64*0, 64*8, 1
6203*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6204*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6205*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
6206*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
6207*c0909341SAndroid Build Coastguard Worker
6208*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6209*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+64*4, 64*8, 1
6210*c0909341SAndroid Build Coastguard Worker
6211*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6212*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6213*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
6214*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
6215*c0909341SAndroid Build Coastguard Worker
6216*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*2, 64*4, 1
6217*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0
6218*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1
6219*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2
6220*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3
6221*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4
6222*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5
6223*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6
6224*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7
6225*c0909341SAndroid Build Coastguard Worker
6226*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
6227*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    rsp+gprsize+16*3, 16
6228*c0909341SAndroid Build Coastguard Worker
6229*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*1, 64*2, 1
6230*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*35], m0                        ;in1
6231*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*49], m1                        ;in3
6232*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*43], m2                        ;in5
6233*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*41], m3                        ;in7
6234*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*39], m4                        ;in9
6235*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*45], m5                        ;in11
6236*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*47], m6                        ;in13
6237*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*37], m7                        ;in15
6238*c0909341SAndroid Build Coastguard Worker
6239*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+64*17, 64*2, 1
6240*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*63], m0                        ;in17
6241*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*53], m1                        ;in19
6242*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*55], m2                        ;in21
6243*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*61], m3                        ;in23
6244*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*59], m4                        ;in25
6245*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*57], m5                        ;in27
6246*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*51], m6                        ;in29
6247*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*65], m7                        ;in31
6248*c0909341SAndroid Build Coastguard Worker
6249*c0909341SAndroid Build Coastguard Worker    call m(idct_16x64_internal_8bpc).main
6250*c0909341SAndroid Build Coastguard Worker
6251*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
6252*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6253*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
6254*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6255*c0909341SAndroid Build Coastguard Worker
6256*c0909341SAndroid Build Coastguard Worker.pass1_end:
6257*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*0, 64
6258*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
6259*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6260*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
6261*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6262*c0909341SAndroid Build Coastguard Worker
6263*c0909341SAndroid Build Coastguard Worker.pass1_end1:
6264*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*8, 64
6265*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
6266*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6267*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
6268*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6269*c0909341SAndroid Build Coastguard Worker
6270*c0909341SAndroid Build Coastguard Worker.pass1_end2:
6271*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*16, 64
6272*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
6273*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6274*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
6275*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6276*c0909341SAndroid Build Coastguard Worker
6277*c0909341SAndroid Build Coastguard Worker.pass1_end3:
6278*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*24, 64
6279*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*35, 16
6280*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6281*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
6282*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6283*c0909341SAndroid Build Coastguard Worker
6284*c0909341SAndroid Build Coastguard Worker.pass1_end4:
6285*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+64*0, 64
6286*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*43, 16
6287*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6288*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end5)]
6289*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6290*c0909341SAndroid Build Coastguard Worker
6291*c0909341SAndroid Build Coastguard Worker.pass1_end5:
6292*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+64*8, 64
6293*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*51, 16
6294*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6295*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end6)]
6296*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6297*c0909341SAndroid Build Coastguard Worker
6298*c0909341SAndroid Build Coastguard Worker.pass1_end6:
6299*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+64*16, 64
6300*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*59, 16
6301*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6302*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end7)]
6303*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end
6304*c0909341SAndroid Build Coastguard Worker
6305*c0909341SAndroid Build Coastguard Worker.pass1_end7:
6306*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+64*24, 64
6307*c0909341SAndroid Build Coastguard Worker
6308*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
6309*c0909341SAndroid Build Coastguard Worker    add                   dstq, 16
6310*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6311*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
6312*c0909341SAndroid Build Coastguard Worker
6313*c0909341SAndroid Build Coastguard Worker.pass2:
6314*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*4+16*67]
6315*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*3+16*67]
6316*c0909341SAndroid Build Coastguard Worker    mov                   eobd, [rsp+gprsize*1+16*67]
6317*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+32]
6318*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*35], eobd
6319*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass2_end)]
6320*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6321*c0909341SAndroid Build Coastguard Worker    jmp m(idct_32x32_internal_8bpc).pass2_loop
6322*c0909341SAndroid Build Coastguard Worker
6323*c0909341SAndroid Build Coastguard Worker.pass2_end:
6324*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6325*c0909341SAndroid Build Coastguard Worker    lea                     r3, [o(.pass2_end1)]
6326*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x32_internal_8bpc).end2
6327*c0909341SAndroid Build Coastguard Worker
6328*c0909341SAndroid Build Coastguard Worker.pass2_end1:
6329*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass2_end)]
6330*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16*32
6331*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*35]
6332*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*3+16*35]
6333*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6334*c0909341SAndroid Build Coastguard Worker    jg m(idct_32x32_internal_8bpc).pass2_loop
6335*c0909341SAndroid Build Coastguard Worker
6336*c0909341SAndroid Build Coastguard Worker.pass2_end2:
6337*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*3+16*67]
6338*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*67]
6339*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(idct_32x32_internal_8bpc).pass2_end)]
6340*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6341*c0909341SAndroid Build Coastguard Worker    jmp m(idct_32x32_internal_8bpc).pass2_loop
6342*c0909341SAndroid Build Coastguard Worker
6343*c0909341SAndroid Build Coastguard Worker
6344*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_8bpc, 4, 6, 8, 16*197, dst, stride, coeff, eob, tx2
6345*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6346*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6347*c0909341SAndroid Build Coastguard Worker%endif
6348*c0909341SAndroid Build Coastguard Worker    test                  eobd, eobd
6349*c0909341SAndroid Build Coastguard Worker    jz .dconly
6350*c0909341SAndroid Build Coastguard Worker
6351*c0909341SAndroid Build Coastguard Worker    call m(idct_64x64_internal_8bpc)
6352*c0909341SAndroid Build Coastguard Worker    RET
6353*c0909341SAndroid Build Coastguard Worker
6354*c0909341SAndroid Build Coastguard Worker.dconly:
6355*c0909341SAndroid Build Coastguard Worker    movd                    m1, [o(pw_2896x8)]
6356*c0909341SAndroid Build Coastguard Worker    pmulhrsw                m0, m1, [coeffq]
6357*c0909341SAndroid Build Coastguard Worker    movd                    m2, [o(pw_8192)]
6358*c0909341SAndroid Build Coastguard Worker    mov               [coeffq], eobd
6359*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 64
6360*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(m(inv_txfm_add_dct_dct_64x32_8bpc).end)]
6361*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_64x16_8bpc).body
6362*c0909341SAndroid Build Coastguard Worker
6363*c0909341SAndroid Build Coastguard Workercglobal idct_64x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2
6364*c0909341SAndroid Build Coastguard Worker    mov                    r5d, 4
6365*c0909341SAndroid Build Coastguard Worker    mov                    r4d, 2
6366*c0909341SAndroid Build Coastguard Worker    sub                   eobd, 136
6367*c0909341SAndroid Build Coastguard Worker    cmovns                 r4d, r5d
6368*c0909341SAndroid Build Coastguard Worker
6369*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32
6370*c0909341SAndroid Build Coastguard Worker    LEA                     r5, $$
6371*c0909341SAndroid Build Coastguard Worker%endif
6372*c0909341SAndroid Build Coastguard Worker
6373*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*1+16*67], eobd
6374*c0909341SAndroid Build Coastguard Worker    mov                    r3d, r4d
6375*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*4+16*67], coeffq
6376*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*3+16*67], dstq
6377*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [rsp+gprsize+16*69]
6378*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], dstq
6379*c0909341SAndroid Build Coastguard Worker
6380*c0909341SAndroid Build Coastguard Worker.pass1_loop:
6381*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+64*0, 64*8
6382*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6383*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6384*c0909341SAndroid Build Coastguard Worker    call  m(idct_8x8_internal_8bpc).main
6385*c0909341SAndroid Build Coastguard Worker    SAVE_7ROWS    rsp+gprsize+16*3, 16
6386*c0909341SAndroid Build Coastguard Worker
6387*c0909341SAndroid Build Coastguard Worker    pxor                    m4, m4
6388*c0909341SAndroid Build Coastguard Worker    LOAD_4ROWS     coeffq+64*4, 64*8
6389*c0909341SAndroid Build Coastguard Worker
6390*c0909341SAndroid Build Coastguard Worker    REPX          {mova x, m4}, m5, m6, m7
6391*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6392*c0909341SAndroid Build Coastguard Worker    mova                    m7, [rsp+gprsize+16*0]
6393*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS   rsp+gprsize+16*11, 16
6394*c0909341SAndroid Build Coastguard Worker
6395*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*2, 64*4
6396*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*19], m0
6397*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*26], m1
6398*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*23], m2
6399*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*22], m3
6400*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*21], m4
6401*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*24], m5
6402*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*25], m6
6403*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*20], m7
6404*c0909341SAndroid Build Coastguard Worker
6405*c0909341SAndroid Build Coastguard Worker    call m(idct_8x32_internal_8bpc).main_fast
6406*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    rsp+gprsize+16*3, 16
6407*c0909341SAndroid Build Coastguard Worker
6408*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS     coeffq+64*1, 64*2
6409*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*35], m0                        ;in1
6410*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*49], m1                        ;in3
6411*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*43], m2                        ;in5
6412*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*41], m3                        ;in7
6413*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*39], m4                        ;in9
6414*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*45], m5                        ;in11
6415*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*47], m6                        ;in13
6416*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*37], m7                        ;in15
6417*c0909341SAndroid Build Coastguard Worker
6418*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    coeffq+64*17, 64*2
6419*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*63], m0                        ;in17
6420*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*53], m1                        ;in19
6421*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*55], m2                        ;in21
6422*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*61], m3                        ;in23
6423*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*59], m4                        ;in25
6424*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*57], m5                        ;in27
6425*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*51], m6                        ;in29
6426*c0909341SAndroid Build Coastguard Worker    mova   [rsp+gprsize+16*65], m7                        ;in31
6427*c0909341SAndroid Build Coastguard Worker
6428*c0909341SAndroid Build Coastguard Worker    call m(idct_16x64_internal_8bpc).main
6429*c0909341SAndroid Build Coastguard Worker
6430*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS    rsp+gprsize+16*3, 16
6431*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6432*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6433*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end)]
6434*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6435*c0909341SAndroid Build Coastguard Worker
6436*c0909341SAndroid Build Coastguard Worker.pass1_end:
6437*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*0, 64
6438*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*11, 16
6439*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6440*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6441*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end1)]
6442*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6443*c0909341SAndroid Build Coastguard Worker
6444*c0909341SAndroid Build Coastguard Worker.pass1_end1:
6445*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS     coeffq+64*8, 64
6446*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*19, 16
6447*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6448*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6449*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end2)]
6450*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6451*c0909341SAndroid Build Coastguard Worker
6452*c0909341SAndroid Build Coastguard Worker.pass1_end2:
6453*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*16, 64
6454*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*27, 16
6455*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6456*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6457*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end3)]
6458*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6459*c0909341SAndroid Build Coastguard Worker
6460*c0909341SAndroid Build Coastguard Worker.pass1_end3:
6461*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS    coeffq+64*24, 64
6462*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*35, 16
6463*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6464*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6465*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end4)]
6466*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6467*c0909341SAndroid Build Coastguard Worker
6468*c0909341SAndroid Build Coastguard Worker.pass1_end4:
6469*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+64*0, 64
6470*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*43, 16
6471*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6472*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6473*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end5)]
6474*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6475*c0909341SAndroid Build Coastguard Worker
6476*c0909341SAndroid Build Coastguard Worker.pass1_end5:
6477*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS       dstq+64*8, 64
6478*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*51, 16
6479*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6480*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6481*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end6)]
6482*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6483*c0909341SAndroid Build Coastguard Worker
6484*c0909341SAndroid Build Coastguard Worker.pass1_end6:
6485*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+64*16, 64
6486*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*59, 16
6487*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6488*c0909341SAndroid Build Coastguard Worker    mova                    m7, [o(pw_8192)]
6489*c0909341SAndroid Build Coastguard Worker    lea                   tx2q, [o(.pass1_end7)]
6490*c0909341SAndroid Build Coastguard Worker    jmp   m(idct_8x8_internal_8bpc).pass1_end1
6491*c0909341SAndroid Build Coastguard Worker
6492*c0909341SAndroid Build Coastguard Worker.pass1_end7:
6493*c0909341SAndroid Build Coastguard Worker    SAVE_8ROWS      dstq+64*24, 64
6494*c0909341SAndroid Build Coastguard Worker
6495*c0909341SAndroid Build Coastguard Worker    add                 coeffq, 16
6496*c0909341SAndroid Build Coastguard Worker    add                   dstq, 16
6497*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6498*c0909341SAndroid Build Coastguard Worker    jg .pass1_loop
6499*c0909341SAndroid Build Coastguard Worker
6500*c0909341SAndroid Build Coastguard Worker.pass2:
6501*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*3+16*67]
6502*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*2+16*67]
6503*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+32]
6504*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6505*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
6506*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
6507*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(.pass2_end)]
6508*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x64_internal_8bpc).pass2_loop
6509*c0909341SAndroid Build Coastguard Worker
6510*c0909341SAndroid Build Coastguard Worker.pass2_end:
6511*c0909341SAndroid Build Coastguard Worker    LOAD_8ROWS   rsp+gprsize+16*35, 16
6512*c0909341SAndroid Build Coastguard Worker    lea                   dstq, [dstq+strideq*2]
6513*c0909341SAndroid Build Coastguard Worker    lea                     r3, [rsp+16*32+gprsize]
6514*c0909341SAndroid Build Coastguard Worker    mova    [rsp+gprsize+16*0], m7
6515*c0909341SAndroid Build Coastguard Worker    call m(idct_16x64_internal_8bpc).write
6516*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
6517*c0909341SAndroid Build Coastguard Worker    mov                    r3d, [rsp+gprsize*3+16*67]
6518*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
6519*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
6520*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(.pass2_end)]
6521*c0909341SAndroid Build Coastguard Worker
6522*c0909341SAndroid Build Coastguard Worker    dec                    r3d
6523*c0909341SAndroid Build Coastguard Worker    jg  m(idct_16x64_internal_8bpc).pass2_loop
6524*c0909341SAndroid Build Coastguard Worker
6525*c0909341SAndroid Build Coastguard Worker.pass2_end2:
6526*c0909341SAndroid Build Coastguard Worker    mov                 coeffq, [rsp+gprsize*4+16*67]
6527*c0909341SAndroid Build Coastguard Worker    mov                   dstq, [rsp+gprsize*2+16*67]
6528*c0909341SAndroid Build Coastguard Worker    mov                    r3d, 4
6529*c0909341SAndroid Build Coastguard Worker    sub                   dstq, 72
6530*c0909341SAndroid Build Coastguard Worker    lea                     r4, [dstq+8]
6531*c0909341SAndroid Build Coastguard Worker    mov  [rsp+gprsize*2+16*67], r4
6532*c0909341SAndroid Build Coastguard Worker    lea                     r4, [o(m(idct_16x64_internal_8bpc).end1)]
6533*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x64_internal_8bpc).pass2_loop
6534