1*c0909341SAndroid Build Coastguard Worker; Copyright © 2018-2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2018, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker 30*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 16 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Workerdeint_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 33*c0909341SAndroid Build Coastguard Worker 34*c0909341SAndroid Build Coastguard Workerdeint_shuf1: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 35*c0909341SAndroid Build Coastguard Workerdeint_shuf2: db 8, 9, 0, 1, 10, 11, 2, 3, 12, 13, 4, 5, 14, 15, 6, 7 36*c0909341SAndroid Build Coastguard Worker 37*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2-3 0 ; !0 = m%1_m%2, 2 = no %2_%1 38*c0909341SAndroid Build Coastguard Workerpw_%1_m%2: times 4 dw %1, -%2 39*c0909341SAndroid Build Coastguard Worker%if %3 != 2 40*c0909341SAndroid Build Coastguard Workerpw_%2_%1: times 4 dw %2, %1 41*c0909341SAndroid Build Coastguard Worker%endif 42*c0909341SAndroid Build Coastguard Worker%if %3 43*c0909341SAndroid Build Coastguard Workerpw_m%1_m%2: times 4 dw -%1, -%2 44*c0909341SAndroid Build Coastguard Worker%endif 45*c0909341SAndroid Build Coastguard Worker%endmacro 46*c0909341SAndroid Build Coastguard Worker 47*c0909341SAndroid Build Coastguard Worker;adst4 48*c0909341SAndroid Build Coastguard Workerpw_1321_3803: times 4 dw 1321, 3803 49*c0909341SAndroid Build Coastguard Workerpw_2482_m1321: times 4 dw 2482, -1321 50*c0909341SAndroid Build Coastguard Workerpw_3344_2482: times 4 dw 3344, 2482 51*c0909341SAndroid Build Coastguard Workerpw_3344_m3803: times 4 dw 3344, -3803 52*c0909341SAndroid Build Coastguard Workerpw_3344_m3344: times 4 dw 3344, -3344 53*c0909341SAndroid Build Coastguard Workerpw_0_3344 times 4 dw 0, 3344 54*c0909341SAndroid Build Coastguard Workerpw_m6688_m3803: times 4 dw -6688, -3803 55*c0909341SAndroid Build Coastguard Worker 56*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 2896 57*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1567, 3784 58*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 4017 59*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3406, 2276 60*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 4076 61*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1931, 3612 62*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 2598 63*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3920, 1189 64*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3784, 1567, 1 65*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 995, 3973 66*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 3703 67*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3513, 2106 68*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 1380 69*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 799, 1 70*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 201, 4091 71*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2440, 3290 72*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 2751 73*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4052, 601 74*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2276, 3406, 1 75*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 401, 2 76*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 3166, 2 77*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3612, 1931, 2 78*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1189, 3920, 2 79*c0909341SAndroid Build Coastguard Worker 80*c0909341SAndroid Build Coastguard Workerpd_2048: times 4 dd 2048 81*c0909341SAndroid Build Coastguard Workerpw_2048: times 8 dw 2048 82*c0909341SAndroid Build Coastguard Workerpw_m2048: times 8 dw -2048 83*c0909341SAndroid Build Coastguard Workerpw_4096: times 8 dw 4096 84*c0909341SAndroid Build Coastguard Workerpw_16384: times 8 dw 16384 85*c0909341SAndroid Build Coastguard Workerpw_m16384: times 8 dw -16384 86*c0909341SAndroid Build Coastguard Workerpw_1697x16: times 8 dw 1697*16 87*c0909341SAndroid Build Coastguard Workerpw_1697x8: times 8 dw 1697*8 88*c0909341SAndroid Build Coastguard Workerpw_2896x8: times 8 dw 2896*8 89*c0909341SAndroid Build Coastguard Workerpw_3344x8: times 8 dw 3344*8 90*c0909341SAndroid Build Coastguard Workerpw_8192: times 8 dw 8192 91*c0909341SAndroid Build Coastguard Workerpw_m8192: times 8 dw -8192 92*c0909341SAndroid Build Coastguard Workerpw_5: times 8 dw 5 93*c0909341SAndroid Build Coastguard Workerpw_201x8: times 8 dw 201*8 94*c0909341SAndroid Build Coastguard Workerpw_4091x8: times 8 dw 4091*8 95*c0909341SAndroid Build Coastguard Workerpw_m2751x8: times 8 dw -2751*8 96*c0909341SAndroid Build Coastguard Workerpw_3035x8: times 8 dw 3035*8 97*c0909341SAndroid Build Coastguard Workerpw_1751x8: times 8 dw 1751*8 98*c0909341SAndroid Build Coastguard Workerpw_3703x8: times 8 dw 3703*8 99*c0909341SAndroid Build Coastguard Workerpw_m1380x8: times 8 dw -1380*8 100*c0909341SAndroid Build Coastguard Workerpw_3857x8: times 8 dw 3857*8 101*c0909341SAndroid Build Coastguard Workerpw_995x8: times 8 dw 995*8 102*c0909341SAndroid Build Coastguard Workerpw_3973x8: times 8 dw 3973*8 103*c0909341SAndroid Build Coastguard Workerpw_m2106x8: times 8 dw -2106*8 104*c0909341SAndroid Build Coastguard Workerpw_3513x8: times 8 dw 3513*8 105*c0909341SAndroid Build Coastguard Workerpw_2440x8: times 8 dw 2440*8 106*c0909341SAndroid Build Coastguard Workerpw_3290x8: times 8 dw 3290*8 107*c0909341SAndroid Build Coastguard Workerpw_m601x8: times 8 dw -601*8 108*c0909341SAndroid Build Coastguard Workerpw_4052x8: times 8 dw 4052*8 109*c0909341SAndroid Build Coastguard Worker 110*c0909341SAndroid Build Coastguard Workerpw_4095x8: times 8 dw 4095*8 111*c0909341SAndroid Build Coastguard Workerpw_101x8: times 8 dw 101*8 112*c0909341SAndroid Build Coastguard Workerpw_2967x8: times 8 dw 2967*8 113*c0909341SAndroid Build Coastguard Workerpw_m2824x8: times 8 dw -2824*8 114*c0909341SAndroid Build Coastguard Workerpw_3745x8: times 8 dw 3745*8 115*c0909341SAndroid Build Coastguard Workerpw_1660x8: times 8 dw 1660*8 116*c0909341SAndroid Build Coastguard Workerpw_3822x8: times 8 dw 3822*8 117*c0909341SAndroid Build Coastguard Workerpw_m1474x8: times 8 dw -1474*8 118*c0909341SAndroid Build Coastguard Workerpw_3996x8: times 8 dw 3996*8 119*c0909341SAndroid Build Coastguard Workerpw_897x8: times 8 dw 897*8 120*c0909341SAndroid Build Coastguard Workerpw_3461x8: times 8 dw 3461*8 121*c0909341SAndroid Build Coastguard Workerpw_m2191x8: times 8 dw -2191*8 122*c0909341SAndroid Build Coastguard Workerpw_3349x8: times 8 dw 3349*8 123*c0909341SAndroid Build Coastguard Workerpw_2359x8: times 8 dw 2359*8 124*c0909341SAndroid Build Coastguard Workerpw_4036x8: times 8 dw 4036*8 125*c0909341SAndroid Build Coastguard Workerpw_m700x8: times 8 dw -700*8 126*c0909341SAndroid Build Coastguard Workerpw_4065x8: times 8 dw 4065*8 127*c0909341SAndroid Build Coastguard Workerpw_501x8: times 8 dw 501*8 128*c0909341SAndroid Build Coastguard Workerpw_3229x8: times 8 dw 3229*8 129*c0909341SAndroid Build Coastguard Workerpw_m2520x8: times 8 dw -2520*8 130*c0909341SAndroid Build Coastguard Workerpw_3564x8: times 8 dw 3564*8 131*c0909341SAndroid Build Coastguard Workerpw_2019x8: times 8 dw 2019*8 132*c0909341SAndroid Build Coastguard Workerpw_3948x8: times 8 dw 3948*8 133*c0909341SAndroid Build Coastguard Workerpw_m1092x8: times 8 dw -1092*8 134*c0909341SAndroid Build Coastguard Workerpw_3889x8: times 8 dw 3889*8 135*c0909341SAndroid Build Coastguard Workerpw_1285x8: times 8 dw 1285*8 136*c0909341SAndroid Build Coastguard Workerpw_3659x8: times 8 dw 3659*8 137*c0909341SAndroid Build Coastguard Workerpw_m1842x8: times 8 dw -1842*8 138*c0909341SAndroid Build Coastguard Workerpw_3102x8: times 8 dw 3102*8 139*c0909341SAndroid Build Coastguard Workerpw_2675x8: times 8 dw 2675*8 140*c0909341SAndroid Build Coastguard Workerpw_4085x8: times 8 dw 4085*8 141*c0909341SAndroid Build Coastguard Workerpw_m301x8: times 8 dw -301*8 142*c0909341SAndroid Build Coastguard Worker 143*c0909341SAndroid Build Coastguard WorkerSECTION .text 144*c0909341SAndroid Build Coastguard Worker 145*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) 146*c0909341SAndroid Build Coastguard Worker 147*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 148*c0909341SAndroid Build Coastguard Worker%define o(x) x 149*c0909341SAndroid Build Coastguard Worker%else 150*c0909341SAndroid Build Coastguard Worker%define o(x) r5-$$+x ; PIC 151*c0909341SAndroid Build Coastguard Worker%endif 152*c0909341SAndroid Build Coastguard Worker 153*c0909341SAndroid Build Coastguard Worker%macro WRITE_4X4 9 ;src[1-2], tmp[1-3], row[1-4] 154*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+strideq*2] 155*c0909341SAndroid Build Coastguard Worker%assign %%i 1 156*c0909341SAndroid Build Coastguard Worker%rotate 5 157*c0909341SAndroid Build Coastguard Worker%rep 4 158*c0909341SAndroid Build Coastguard Worker %if %1 & 2 159*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%row_adr, %%i, r2 + strideq*(%1&1) 160*c0909341SAndroid Build Coastguard Worker %else 161*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1) 162*c0909341SAndroid Build Coastguard Worker %endif 163*c0909341SAndroid Build Coastguard Worker %assign %%i %%i + 1 164*c0909341SAndroid Build Coastguard Worker %rotate 1 165*c0909341SAndroid Build Coastguard Worker%endrep 166*c0909341SAndroid Build Coastguard Worker 167*c0909341SAndroid Build Coastguard Worker movd m%3, [%%row_adr1] ;dst0 168*c0909341SAndroid Build Coastguard Worker movd m%5, [%%row_adr2] ;dst1 169*c0909341SAndroid Build Coastguard Worker punpckldq m%3, m%5 ;high: dst1 :low: dst0 170*c0909341SAndroid Build Coastguard Worker movd m%4, [%%row_adr3] ;dst2 171*c0909341SAndroid Build Coastguard Worker movd m%5, [%%row_adr4] ;dst3 172*c0909341SAndroid Build Coastguard Worker punpckldq m%4, m%5 ;high: dst3 :low: dst2 173*c0909341SAndroid Build Coastguard Worker 174*c0909341SAndroid Build Coastguard Worker pxor m%5, m%5 175*c0909341SAndroid Build Coastguard Worker punpcklbw m%3, m%5 ;extend byte to word 176*c0909341SAndroid Build Coastguard Worker punpcklbw m%4, m%5 ;extend byte to word 177*c0909341SAndroid Build Coastguard Worker 178*c0909341SAndroid Build Coastguard Worker paddw m%3, m%1 ;high: dst1 + out1 ;low: dst0 + out0 179*c0909341SAndroid Build Coastguard Worker paddw m%4, m%2 ;high: dst3 + out3 ;low: dst2 + out2 180*c0909341SAndroid Build Coastguard Worker 181*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%4 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0 182*c0909341SAndroid Build Coastguard Worker 183*c0909341SAndroid Build Coastguard Worker movd [%%row_adr1], m%3 ;store dst0 + out0 184*c0909341SAndroid Build Coastguard Worker pshuflw m%4, m%3, q1032 185*c0909341SAndroid Build Coastguard Worker movd [%%row_adr2], m%4 ;store dst1 + out1 186*c0909341SAndroid Build Coastguard Worker punpckhqdq m%3, m%3 187*c0909341SAndroid Build Coastguard Worker movd [%%row_adr3], m%3 ;store dst2 + out2 188*c0909341SAndroid Build Coastguard Worker psrlq m%3, 32 189*c0909341SAndroid Build Coastguard Worker movd [%%row_adr4], m%3 ;store dst3 + out3 190*c0909341SAndroid Build Coastguard Worker%endmacro 191*c0909341SAndroid Build Coastguard Worker 192*c0909341SAndroid Build Coastguard Worker%macro ITX4_END 4-5 2048 ; row[1-4], rnd 193*c0909341SAndroid Build Coastguard Worker%if %5 194*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_%5)] 195*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 196*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 197*c0909341SAndroid Build Coastguard Worker%endif 198*c0909341SAndroid Build Coastguard Worker 199*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 1, 2, 3, 4, %1, %2, %3, %4 200*c0909341SAndroid Build Coastguard Worker ret 201*c0909341SAndroid Build Coastguard Worker%endmacro 202*c0909341SAndroid Build Coastguard Worker 203*c0909341SAndroid Build Coastguard Worker; flags: 1 = swap, 2: coef_regs, 4: no_pack 204*c0909341SAndroid Build Coastguard Worker%macro ITX_MUL2X_PACK 5-6 0 ; dst/src, tmp[1], rnd, coef[1-2], flags 205*c0909341SAndroid Build Coastguard Worker%if %6 & 2 206*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%4, m%1 207*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%5 208*c0909341SAndroid Build Coastguard Worker%elif %6 & 1 209*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%1, [o(pw_%5_%4)] 210*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, [o(pw_%4_m%5)] 211*c0909341SAndroid Build Coastguard Worker%else 212*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%1, [o(pw_%4_m%5)] 213*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, [o(pw_%5_%4)] 214*c0909341SAndroid Build Coastguard Worker%endif 215*c0909341SAndroid Build Coastguard Worker paddd m%2, m%3 216*c0909341SAndroid Build Coastguard Worker paddd m%1, m%3 217*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 218*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 219*c0909341SAndroid Build Coastguard Worker%if %6 & 4 == 0 220*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%2 221*c0909341SAndroid Build Coastguard Worker%endif 222*c0909341SAndroid Build Coastguard Worker%endmacro 223*c0909341SAndroid Build Coastguard Worker 224*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED 0-1 ;pw_2896x8 225*c0909341SAndroid Build Coastguard Worker mova m3, [o(pd_2048)] 226*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 ;unpacked in1 in3 227*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 ;unpacked in0 in2 228*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 1, 3, 1567, 3784 229*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 1, 3, 2896, 2896 230*c0909341SAndroid Build Coastguard Worker psubsw m1, m0, m2 ;high: out2 ;low: out3 231*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;high: out1 ;low: out0 232*c0909341SAndroid Build Coastguard Worker%endmacro 233*c0909341SAndroid Build Coastguard Worker 234*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 4+ ; type1, type2, size, xmm/stack 235*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%3_8bpc, 4, 6, %4, dst, stride, coeff, eob, tx2 236*c0909341SAndroid Build Coastguard Worker %define %%p1 m(i%1_%3_internal_8bpc) 237*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 238*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 239*c0909341SAndroid Build Coastguard Worker%endif 240*c0909341SAndroid Build Coastguard Worker%if has_epilogue 241*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 242*c0909341SAndroid Build Coastguard Worker test eobd, eobd 243*c0909341SAndroid Build Coastguard Worker jz %%end 244*c0909341SAndroid Build Coastguard Worker%endif 245*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(i%2_%3_internal_8bpc).pass2)] 246*c0909341SAndroid Build Coastguard Worker call %%p1 247*c0909341SAndroid Build Coastguard Worker RET 248*c0909341SAndroid Build Coastguard Worker%%end: 249*c0909341SAndroid Build Coastguard Worker%else 250*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(i%2_%3_internal_8bpc).pass2)] 251*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 252*c0909341SAndroid Build Coastguard Worker test eobd, eobd 253*c0909341SAndroid Build Coastguard Worker jnz %%p1 254*c0909341SAndroid Build Coastguard Worker%else 255*c0909341SAndroid Build Coastguard Worker times ((%%end - %%p1) >> 31) & 1 jmp %%p1 256*c0909341SAndroid Build Coastguard WorkerALIGN function_align 257*c0909341SAndroid Build Coastguard Worker%%end: 258*c0909341SAndroid Build Coastguard Worker%endif 259*c0909341SAndroid Build Coastguard Worker%endif 260*c0909341SAndroid Build Coastguard Worker%endmacro 261*c0909341SAndroid Build Coastguard Worker 262*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X4_FN 2 ; type1, type2 263*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x4, 6 264*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 265*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 266*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 267*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 268*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 269*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd ;0 270*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 271*c0909341SAndroid Build Coastguard Worker mova m1, m0 272*c0909341SAndroid Build Coastguard Worker TAIL_CALL m(iadst_4x4_internal_8bpc).end2 273*c0909341SAndroid Build Coastguard Worker%endif 274*c0909341SAndroid Build Coastguard Worker%endmacro 275*c0909341SAndroid Build Coastguard Worker 276*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 277*c0909341SAndroid Build Coastguard Worker; itx16 relies on dct_dct being the first function. If you change the order, adjust `itx8_start` in itx16. 278*c0909341SAndroid Build Coastguard Worker 279*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct 280*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst 281*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst 282*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity 283*c0909341SAndroid Build Coastguard Worker 284*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 285*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] ;high: in1 ;low: in0 286*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] ;high: in3 ;low in2 287*c0909341SAndroid Build Coastguard Worker 288*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 289*c0909341SAndroid Build Coastguard Worker 290*c0909341SAndroid Build Coastguard Worker mova m2, [o(deint_shuf)] 291*c0909341SAndroid Build Coastguard Worker shufps m3, m0, m1, q1331 292*c0909341SAndroid Build Coastguard Worker shufps m0, m1, q0220 293*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 ;high: in1 ;low: in0 294*c0909341SAndroid Build Coastguard Worker pshufb m1, m3, m2 ;high: in3 ;low :in2 295*c0909341SAndroid Build Coastguard Worker jmp tx2q 296*c0909341SAndroid Build Coastguard Worker 297*c0909341SAndroid Build Coastguard Worker.pass2: 298*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 299*c0909341SAndroid Build Coastguard Worker 300*c0909341SAndroid Build Coastguard Worker pxor m2, m2 301*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m2 302*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m2 ;memset(coeff, 0, sizeof(*coeff) * sh * sw); 303*c0909341SAndroid Build Coastguard Worker 304*c0909341SAndroid Build Coastguard Worker ITX4_END 0, 1, 3, 2 305*c0909341SAndroid Build Coastguard Worker 306*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct 307*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst 308*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst 309*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity 310*c0909341SAndroid Build Coastguard Worker 311*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 312*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 313*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 314*c0909341SAndroid Build Coastguard Worker call .main 315*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 316*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 317*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m2 ;high: in3 ;low :in2 318*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;high: in1 ;low: in0 319*c0909341SAndroid Build Coastguard Worker jmp tx2q 320*c0909341SAndroid Build Coastguard Worker 321*c0909341SAndroid Build Coastguard Worker.pass2: 322*c0909341SAndroid Build Coastguard Worker call .main 323*c0909341SAndroid Build Coastguard Worker 324*c0909341SAndroid Build Coastguard Worker.end: 325*c0909341SAndroid Build Coastguard Worker pxor m2, m2 326*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m2 327*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m2 328*c0909341SAndroid Build Coastguard Worker 329*c0909341SAndroid Build Coastguard Worker.end2: 330*c0909341SAndroid Build Coastguard Worker ITX4_END 0, 1, 2, 3 331*c0909341SAndroid Build Coastguard Worker 332*c0909341SAndroid Build Coastguard WorkerALIGN function_align 333*c0909341SAndroid Build Coastguard Workercglobal_label .main 334*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m0, m1 ;unpacked in0 in2 335*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 ;unpacked in1 in3 336*c0909341SAndroid Build Coastguard Worker mova m3, m0 337*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m2, [o(pw_3344_m3344)];3344 * in0 - 3344 * in2 338*c0909341SAndroid Build Coastguard Worker pmaddwd m0, [o(pw_0_3344)] ;3344 * in3 339*c0909341SAndroid Build Coastguard Worker paddd m1, m0 ;t2 340*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m2, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 341*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 342*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m3, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 343*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3, [o(pw_3344_m3803)];3344 * in1 - 3803 * in3 344*c0909341SAndroid Build Coastguard Worker paddd m4, m0 ;t0 + t3 345*c0909341SAndroid Build Coastguard Worker pmaddwd m3, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 346*c0909341SAndroid Build Coastguard Worker mova m0, [o(pd_2048)] 347*c0909341SAndroid Build Coastguard Worker paddd m1, m0 ;t2 + 2048 348*c0909341SAndroid Build Coastguard Worker paddd m2, m0 349*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ;t0 + t3 + 2048 350*c0909341SAndroid Build Coastguard Worker paddd m5, m2 ;t1 + t3 + 2048 351*c0909341SAndroid Build Coastguard Worker paddd m2, m4 352*c0909341SAndroid Build Coastguard Worker paddd m2, m3 ;t0 + t1 - t3 + 2048 353*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m1, m0, m5, m2 354*c0909341SAndroid Build Coastguard Worker packssdw m0, m5 ;high: out1 ;low: out0 355*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 ;high: out3 ;low: out3 356*c0909341SAndroid Build Coastguard Worker ret 357*c0909341SAndroid Build Coastguard Worker 358*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct 359*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst 360*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst 361*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity 362*c0909341SAndroid Build Coastguard Worker 363*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 364*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 365*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 366*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 367*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m0 368*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0 369*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ;high: in3 ;low :in2 370*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ;high: in1 ;low: in0 371*c0909341SAndroid Build Coastguard Worker jmp tx2q 372*c0909341SAndroid Build Coastguard Worker 373*c0909341SAndroid Build Coastguard Worker.pass2: 374*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 375*c0909341SAndroid Build Coastguard Worker 376*c0909341SAndroid Build Coastguard Worker.end: 377*c0909341SAndroid Build Coastguard Worker pxor m2, m2 378*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m2 379*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m2 380*c0909341SAndroid Build Coastguard Worker 381*c0909341SAndroid Build Coastguard Worker.end2: 382*c0909341SAndroid Build Coastguard Worker ITX4_END 3, 2, 1, 0 383*c0909341SAndroid Build Coastguard Worker 384*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct 385*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst 386*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst 387*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity 388*c0909341SAndroid Build Coastguard Worker 389*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 390*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 391*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 392*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_1697x8)] 393*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m0, m3 394*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 395*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 396*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 397*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 398*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 399*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m2 ;high: in3 ;low :in2 400*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;high: in1 ;low: in0 401*c0909341SAndroid Build Coastguard Worker jmp tx2q 402*c0909341SAndroid Build Coastguard Worker 403*c0909341SAndroid Build Coastguard Worker.pass2: 404*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_1697x8)] 405*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, m0 406*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 407*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 408*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 409*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_8bpc).end 410*c0909341SAndroid Build Coastguard Worker 411*c0909341SAndroid Build Coastguard Worker%macro IWHT4_1D_PACKED 0 412*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m0, m1 ;low: in1 high: in3 413*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 ;low: in0 high: in2 414*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m3 ;low: in0 - in1 high: in2 - in3 415*c0909341SAndroid Build Coastguard Worker paddw m0, m3 ;low: in0 + in1 high: in2 + in3 416*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m2 ;t2 t2 417*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 ;t0 t0 418*c0909341SAndroid Build Coastguard Worker psubw m1, m0, m2 419*c0909341SAndroid Build Coastguard Worker psraw m1, 1 ;t4 t4 420*c0909341SAndroid Build Coastguard Worker psubw m1, m3 ;low: t1/out2 high: t3/out1 421*c0909341SAndroid Build Coastguard Worker psubw m0, m1 ;high: out0 422*c0909341SAndroid Build Coastguard Worker paddw m2, m1 ;low: out3 423*c0909341SAndroid Build Coastguard Worker%endmacro 424*c0909341SAndroid Build Coastguard Worker 425*c0909341SAndroid Build Coastguard WorkerINIT_XMM sse2 426*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, coeff 427*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 428*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 429*c0909341SAndroid Build Coastguard Worker pxor m2, m2 430*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m2 431*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m2 432*c0909341SAndroid Build Coastguard Worker psraw m0, 2 433*c0909341SAndroid Build Coastguard Worker psraw m1, 2 434*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 435*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 436*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1, m2 437*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m3 438*c0909341SAndroid Build Coastguard Worker punpckldq m0, m3 439*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 440*c0909341SAndroid Build Coastguard Worker shufpd m0, m2, 0x01 441*c0909341SAndroid Build Coastguard Worker ITX4_END 0, 3, 2, 1, 0 442*c0909341SAndroid Build Coastguard Worker 443*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D_PACKED 0 444*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 445*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m3 ;unpacked in1 in7 446*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;unpacked in0 in4 447*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1 ;unpacked in5 in3 448*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 ;unpacked in2 in6 449*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 3, 6, 799, 4017 ;low: t7a high: t4a 450*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 3, 6, 3406, 2276 ;low: t6a high: t5a 451*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 3, 6, 1567, 3784 ;low: t3 high: t2 452*c0909341SAndroid Build Coastguard Worker psubsw m3, m4, m2 ;low: t6a high: t5a 453*c0909341SAndroid Build Coastguard Worker paddsw m4, m2 ;low: t7 high: t4 454*c0909341SAndroid Build Coastguard Worker pshufb m3, [o(deint_shuf1)] 455*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 2, 6, 2896, 2896 ;low: t0 high: t1 456*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 2, 6, 2896, 2896 ;low: t6 high: t5 457*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m1 ;low: tmp3 high: tmp2 458*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;low: tmp0 high: tmp1 459*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m4, m3 ;low: t7 high: t6 460*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m3 ;low: t4 high: t5 461*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m1 ;low: out7 high: out6 462*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;low: out0 high: out1 463*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m4 ;low: out3 high: out2 464*c0909341SAndroid Build Coastguard Worker psubsw m2, m4 ;low: out4 high: out5 465*c0909341SAndroid Build Coastguard Worker%endmacro 466*c0909341SAndroid Build Coastguard Worker 467*c0909341SAndroid Build Coastguard Worker;dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 468*c0909341SAndroid Build Coastguard Worker;dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 469*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2W 7-8 0 ; dst/src[1-2], tmp[1-2], rnd, coef[1-2], dst2_in_tmp1 470*c0909341SAndroid Build Coastguard Worker punpckhwd m%4, m%1, m%2 471*c0909341SAndroid Build Coastguard Worker punpcklwd m%1, m%2 472*c0909341SAndroid Build Coastguard Worker%if %7 < 8 473*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%7, m%1 474*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%7, m%4 475*c0909341SAndroid Build Coastguard Worker%else 476*c0909341SAndroid Build Coastguard Worker mova m%2, [o(pw_%7_%6)] 477*c0909341SAndroid Build Coastguard Worker%if %8 478*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%1, m%2 479*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%4 480*c0909341SAndroid Build Coastguard Worker%else 481*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%4, m%2 482*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%1 483*c0909341SAndroid Build Coastguard Worker%endif 484*c0909341SAndroid Build Coastguard Worker%endif 485*c0909341SAndroid Build Coastguard Worker paddd m%3, m%5 486*c0909341SAndroid Build Coastguard Worker paddd m%2, m%5 487*c0909341SAndroid Build Coastguard Worker psrad m%3, 12 488*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 489*c0909341SAndroid Build Coastguard Worker%if %8 490*c0909341SAndroid Build Coastguard Worker packssdw m%3, m%2 491*c0909341SAndroid Build Coastguard Worker%else 492*c0909341SAndroid Build Coastguard Worker packssdw m%2, m%3 ;dst2 493*c0909341SAndroid Build Coastguard Worker%endif 494*c0909341SAndroid Build Coastguard Worker%if %7 < 8 495*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, m%6 496*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%6 497*c0909341SAndroid Build Coastguard Worker%elif %8 498*c0909341SAndroid Build Coastguard Worker mova m%2, [o(pw_%6_m%7)] 499*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, m%2 500*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%2 501*c0909341SAndroid Build Coastguard Worker%else 502*c0909341SAndroid Build Coastguard Worker mova m%3, [o(pw_%6_m%7)] 503*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, m%3 504*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%3 505*c0909341SAndroid Build Coastguard Worker%endif 506*c0909341SAndroid Build Coastguard Worker paddd m%4, m%5 507*c0909341SAndroid Build Coastguard Worker paddd m%1, m%5 508*c0909341SAndroid Build Coastguard Worker psrad m%4, 12 509*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 510*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%4 ;dst1 511*c0909341SAndroid Build Coastguard Worker%endmacro 512*c0909341SAndroid Build Coastguard Worker 513*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D 7 ; src[1-4], tmp[1-2], pd_2048 514*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %2, %4, %5, %6, %7, 1567, 3784, 1 ;t2, t3 515*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %1, %3, %4, %6, %7, 2896, 2896, 1 ;t1, t0 516*c0909341SAndroid Build Coastguard Worker psubsw m%3, m%1, m%2 ;out2 517*c0909341SAndroid Build Coastguard Worker paddsw m%2, m%1 ;out1 518*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%5, m%4 ;out0 519*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%5 ;out3 520*c0909341SAndroid Build Coastguard Worker%endmacro 521*c0909341SAndroid Build Coastguard Worker 522*c0909341SAndroid Build Coastguard Worker%macro WRITE_4X8 4 ;row[1-4] 523*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 1, 4, 5, 6, %1, %2, %3, %4 524*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 525*c0909341SAndroid Build Coastguard Worker WRITE_4X4 2, 3, 4, 5, 6, %1, %2, %3, %4 526*c0909341SAndroid Build Coastguard Worker%endmacro 527*c0909341SAndroid Build Coastguard Worker 528*c0909341SAndroid Build Coastguard Worker%macro INV_4X8 0 529*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2, m3 530*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 531*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 532*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 533*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 ;low: in2 high: in3 534*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 ;low: in0 high: in1 535*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 ;low: in4 high: in5 536*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 ;low: in6 high: in7 537*c0909341SAndroid Build Coastguard Worker%endmacro 538*c0909341SAndroid Build Coastguard Worker 539*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X8_FN 2 ; type1, type2 540*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x8, 8 541*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 542*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 543*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 544*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 545*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 546*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 547*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 548*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 549*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2048)] 550*c0909341SAndroid Build Coastguard Worker mova m1, m0 551*c0909341SAndroid Build Coastguard Worker mova m2, m0 552*c0909341SAndroid Build Coastguard Worker mova m3, m0 553*c0909341SAndroid Build Coastguard Worker TAIL_CALL m(iadst_4x8_internal_8bpc).end3 554*c0909341SAndroid Build Coastguard Worker%endif 555*c0909341SAndroid Build Coastguard Worker%endmacro 556*c0909341SAndroid Build Coastguard Worker 557*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct 559*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst 560*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst 561*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity 562*c0909341SAndroid Build Coastguard Worker 563*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 564*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 565*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 566*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 567*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 568*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 569*c0909341SAndroid Build Coastguard Worker 570*c0909341SAndroid Build Coastguard Worker.pass1: 571*c0909341SAndroid Build Coastguard Worker call m(idct_8x4_internal_8bpc).main 572*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).pass1_end 573*c0909341SAndroid Build Coastguard Worker 574*c0909341SAndroid Build Coastguard Worker.pass2: 575*c0909341SAndroid Build Coastguard Worker call .main 576*c0909341SAndroid Build Coastguard Worker shufps m1, m1, q1032 577*c0909341SAndroid Build Coastguard Worker shufps m3, m3, q1032 578*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_2048)] 579*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end2 580*c0909341SAndroid Build Coastguard Worker 581*c0909341SAndroid Build Coastguard WorkerALIGN function_align 582*c0909341SAndroid Build Coastguard Workercglobal_label .main 583*c0909341SAndroid Build Coastguard Worker IDCT8_1D_PACKED 584*c0909341SAndroid Build Coastguard Worker ret 585*c0909341SAndroid Build Coastguard Worker 586*c0909341SAndroid Build Coastguard Worker 587*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct 588*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst 589*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst 590*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity 591*c0909341SAndroid Build Coastguard Worker 592*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 593*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 594*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 595*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 596*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 597*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 598*c0909341SAndroid Build Coastguard Worker 599*c0909341SAndroid Build Coastguard Worker.pass1: 600*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 601*c0909341SAndroid Build Coastguard Worker 602*c0909341SAndroid Build Coastguard Worker.pass1_end: 603*c0909341SAndroid Build Coastguard Worker INV_4X8 604*c0909341SAndroid Build Coastguard Worker jmp tx2q 605*c0909341SAndroid Build Coastguard Worker 606*c0909341SAndroid Build Coastguard Worker.pass2: 607*c0909341SAndroid Build Coastguard Worker shufps m0, m0, q1032 608*c0909341SAndroid Build Coastguard Worker shufps m1, m1, q1032 609*c0909341SAndroid Build Coastguard Worker call .main 610*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_2048)] 611*c0909341SAndroid Build Coastguard Worker pxor m5, m5 612*c0909341SAndroid Build Coastguard Worker psubw m5, m4 613*c0909341SAndroid Build Coastguard Worker 614*c0909341SAndroid Build Coastguard Worker.end: 615*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5 616*c0909341SAndroid Build Coastguard Worker 617*c0909341SAndroid Build Coastguard Worker.end2: 618*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 619*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 620*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 621*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 622*c0909341SAndroid Build Coastguard Worker pxor m5, m5 623*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m5 624*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m5 625*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*2], m5 626*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*3], m5 627*c0909341SAndroid Build Coastguard Worker 628*c0909341SAndroid Build Coastguard Worker.end3: 629*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1, 2, 3 630*c0909341SAndroid Build Coastguard Worker RET 631*c0909341SAndroid Build Coastguard Worker 632*c0909341SAndroid Build Coastguard WorkerALIGN function_align 633*c0909341SAndroid Build Coastguard Workercglobal_label .main 634*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 635*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m0 ;unpacked in7 in0 636*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m2, m1 ;unpacked in5 in2 637*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ;unpacked in3 in4 638*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 ;unpacked in1 in6 639*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 2, 6, 401, 4076 ;low: t0a high: t1a 640*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 2, 6, 1931, 3612 ;low: t2a high: t3a 641*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 2, 6, 3166, 2598 ;low: t4a high: t5a 642*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 2, 6, 3920, 1189 ;low: t6a high: t7a 643*c0909341SAndroid Build Coastguard Worker 644*c0909341SAndroid Build Coastguard Worker psubsw m3, m4, m1 ;low: t4 high: t5 645*c0909341SAndroid Build Coastguard Worker paddsw m4, m1 ;low: t0 high: t1 646*c0909341SAndroid Build Coastguard Worker psubsw m2, m5, m0 ;low: t6 high: t7 647*c0909341SAndroid Build Coastguard Worker paddsw m5, m0 ;low: t2 high: t3 648*c0909341SAndroid Build Coastguard Worker 649*c0909341SAndroid Build Coastguard Worker shufps m1, m3, m2, q1032 650*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1 651*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 652*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 0, 6, 1567, 3784, 1 ;low: t5a high: t4a 653*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 0, 6, 3784, 1567 ;low: t7a high: t6a 654*c0909341SAndroid Build Coastguard Worker 655*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m5 ;low: t2 high: t3 656*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ;low: out0 high: -out7 657*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;low: t7 high: t6 658*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;low: out6 high: -out1 659*c0909341SAndroid Build Coastguard Worker shufps m0, m4, m3, q3210 ;low: out0 high: -out1 660*c0909341SAndroid Build Coastguard Worker shufps m3, m4, q3210 ;low: out6 high: -out7 661*c0909341SAndroid Build Coastguard Worker 662*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_2896_m2896)] 663*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896_2896)] 664*c0909341SAndroid Build Coastguard Worker shufps m4, m1, m5, q1032 ;low: t3 high: t7 665*c0909341SAndroid Build Coastguard Worker shufps m1, m5, q3210 ;low: t2 high: t6 666*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m1, m4 667*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m4 668*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m2, m1 ;-out5 669*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 ; out4 670*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7 ; out2 671*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 ;-out3 672*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m6}, m4, m2, m1, m5 673*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m2, m1, m5 674*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 ;low: out2 high: -out3 675*c0909341SAndroid Build Coastguard Worker packssdw m2, m4 ;low: out4 high: -out5 676*c0909341SAndroid Build Coastguard Worker ret 677*c0909341SAndroid Build Coastguard Worker 678*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct 679*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst 680*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst 681*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity 682*c0909341SAndroid Build Coastguard Worker 683*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 684*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 685*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 686*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 687*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 688*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 689*c0909341SAndroid Build Coastguard Worker 690*c0909341SAndroid Build Coastguard Worker.pass1: 691*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 692*c0909341SAndroid Build Coastguard Worker 693*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3, m2 694*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 695*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m1, m0 696*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0 697*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m1 ;low: in4 high: in5 698*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m1 ;low: in6 high: in7 699*c0909341SAndroid Build Coastguard Worker punpckldq m0, m4, m5 ;low: in0 high: in1 700*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m4, m5 ;low: in2 high: in3 701*c0909341SAndroid Build Coastguard Worker jmp tx2q 702*c0909341SAndroid Build Coastguard Worker 703*c0909341SAndroid Build Coastguard Worker.pass2: 704*c0909341SAndroid Build Coastguard Worker shufps m0, m0, q1032 705*c0909341SAndroid Build Coastguard Worker shufps m1, m1, q1032 706*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main 707*c0909341SAndroid Build Coastguard Worker 708*c0909341SAndroid Build Coastguard Worker mova m4, m0 709*c0909341SAndroid Build Coastguard Worker mova m5, m1 710*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q1032 711*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1032 712*c0909341SAndroid Build Coastguard Worker pshufd m2, m5, q1032 713*c0909341SAndroid Build Coastguard Worker pshufd m3, m4, q1032 714*c0909341SAndroid Build Coastguard Worker mova m5, [o(pw_2048)] 715*c0909341SAndroid Build Coastguard Worker pxor m4, m4 716*c0909341SAndroid Build Coastguard Worker psubw m4, m5 717*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end 718*c0909341SAndroid Build Coastguard Worker 719*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct 720*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst 721*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst 722*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity 723*c0909341SAndroid Build Coastguard Worker 724*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 725*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 726*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 727*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 728*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 729*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 730*c0909341SAndroid Build Coastguard Worker 731*c0909341SAndroid Build Coastguard Worker.pass1: 732*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_1697x8)] 733*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, m0 734*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m1 735*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m2 736*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m3 737*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 738*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 739*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 740*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 741*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).pass1_end 742*c0909341SAndroid Build Coastguard Worker 743*c0909341SAndroid Build Coastguard Worker.pass2: 744*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_4096)] 745*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end2 746*c0909341SAndroid Build Coastguard Worker 747*c0909341SAndroid Build Coastguard Worker 748*c0909341SAndroid Build Coastguard Worker%macro WRITE_8X2 5 ;coefs[1-2], tmp[1-3] 749*c0909341SAndroid Build Coastguard Worker movq m%3, [dstq ] 750*c0909341SAndroid Build Coastguard Worker movq m%4, [dstq+strideq] 751*c0909341SAndroid Build Coastguard Worker pxor m%5, m%5 752*c0909341SAndroid Build Coastguard Worker punpcklbw m%3, m%5 ;extend byte to word 753*c0909341SAndroid Build Coastguard Worker punpcklbw m%4, m%5 ;extend byte to word 754*c0909341SAndroid Build Coastguard Worker%ifnum %1 755*c0909341SAndroid Build Coastguard Worker paddw m%3, m%1 756*c0909341SAndroid Build Coastguard Worker%else 757*c0909341SAndroid Build Coastguard Worker paddw m%3, %1 758*c0909341SAndroid Build Coastguard Worker%endif 759*c0909341SAndroid Build Coastguard Worker%ifnum %2 760*c0909341SAndroid Build Coastguard Worker paddw m%4, m%2 761*c0909341SAndroid Build Coastguard Worker%else 762*c0909341SAndroid Build Coastguard Worker paddw m%4, %2 763*c0909341SAndroid Build Coastguard Worker%endif 764*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%4 765*c0909341SAndroid Build Coastguard Worker movq [dstq ], m%3 766*c0909341SAndroid Build Coastguard Worker punpckhqdq m%3, m%3 767*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq], m%3 768*c0909341SAndroid Build Coastguard Worker%endmacro 769*c0909341SAndroid Build Coastguard Worker 770*c0909341SAndroid Build Coastguard Worker%macro WRITE_8X4 7 ;coefs[1-4], tmp[1-3] 771*c0909341SAndroid Build Coastguard Worker WRITE_8X2 %1, %2, %5, %6, %7 772*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 773*c0909341SAndroid Build Coastguard Worker WRITE_8X2 %3, %4, %5, %6, %7 774*c0909341SAndroid Build Coastguard Worker%endmacro 775*c0909341SAndroid Build Coastguard Worker 776*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X4_FN 2 ; type1, type2 777*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x4, 8 778*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 779*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 780*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 781*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 782*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 783*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 784*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_2048)] 785*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 786*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 787*c0909341SAndroid Build Coastguard Worker mova m1, m0 788*c0909341SAndroid Build Coastguard Worker mova m2, m0 789*c0909341SAndroid Build Coastguard Worker mova m3, m0 790*c0909341SAndroid Build Coastguard Worker TAIL_CALL m(iadst_8x4_internal_8bpc).end2 791*c0909341SAndroid Build Coastguard Worker%endif 792*c0909341SAndroid Build Coastguard Worker%endmacro 793*c0909341SAndroid Build Coastguard Worker 794*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct 795*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst 796*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst 797*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity 798*c0909341SAndroid Build Coastguard Worker 799*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 800*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 801*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 802*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 803*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 804*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 805*c0909341SAndroid Build Coastguard Worker 806*c0909341SAndroid Build Coastguard Worker call m(idct_4x8_internal_8bpc).main 807*c0909341SAndroid Build Coastguard Worker 808*c0909341SAndroid Build Coastguard Worker mova m4, [o(deint_shuf1)] 809*c0909341SAndroid Build Coastguard Worker mova m5, [o(deint_shuf2)] 810*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 811*c0909341SAndroid Build Coastguard Worker pshufb m1, m5 812*c0909341SAndroid Build Coastguard Worker pshufb m2, m4 813*c0909341SAndroid Build Coastguard Worker pshufb m3, m5 814*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m0, m1 815*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 816*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m2, m3 817*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3 818*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 ;in1 819*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ;in0 820*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4, m5 ;in3 821*c0909341SAndroid Build Coastguard Worker punpcklqdq m2 ,m4, m5 ;in2 822*c0909341SAndroid Build Coastguard Worker jmp tx2q 823*c0909341SAndroid Build Coastguard Worker 824*c0909341SAndroid Build Coastguard Worker.pass2: 825*c0909341SAndroid Build Coastguard Worker call .main 826*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end 827*c0909341SAndroid Build Coastguard Worker 828*c0909341SAndroid Build Coastguard WorkerALIGN function_align 829*c0909341SAndroid Build Coastguard Workercglobal_label .main 830*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 831*c0909341SAndroid Build Coastguard Worker IDCT4_1D 0, 1, 2, 3, 4, 5, 6 832*c0909341SAndroid Build Coastguard Worker ret 833*c0909341SAndroid Build Coastguard Worker 834*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct 835*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst 836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst 837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity 838*c0909341SAndroid Build Coastguard Worker 839*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 840*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 841*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 842*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 843*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 844*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 845*c0909341SAndroid Build Coastguard Worker 846*c0909341SAndroid Build Coastguard Worker shufps m0, m0, q1032 847*c0909341SAndroid Build Coastguard Worker shufps m1, m1, q1032 848*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main 849*c0909341SAndroid Build Coastguard Worker 850*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 851*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 852*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 853*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 854*c0909341SAndroid Build Coastguard Worker pxor m5, m5 855*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m1 856*c0909341SAndroid Build Coastguard Worker psubsw m5, m4 857*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m5, m3 858*c0909341SAndroid Build Coastguard Worker punpckldq m5, m3 859*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 860*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 861*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m5 ;in1 862*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m5 ;in0 863*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 ;in2 864*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ;in3 865*c0909341SAndroid Build Coastguard Worker jmp tx2q 866*c0909341SAndroid Build Coastguard Worker 867*c0909341SAndroid Build Coastguard Worker.pass2: 868*c0909341SAndroid Build Coastguard Worker call .main 869*c0909341SAndroid Build Coastguard Worker 870*c0909341SAndroid Build Coastguard Worker.end: 871*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_2048)] 872*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 873*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 874*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 875*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 876*c0909341SAndroid Build Coastguard Worker 877*c0909341SAndroid Build Coastguard Worker.end2: 878*c0909341SAndroid Build Coastguard Worker pxor m6, m6 879*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0], m6 880*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m6 881*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*2], m6 882*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*3], m6 883*c0909341SAndroid Build Coastguard Worker.end3: 884*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 2, 3, 4, 5, 6 885*c0909341SAndroid Build Coastguard Worker RET 886*c0909341SAndroid Build Coastguard Worker 887*c0909341SAndroid Build Coastguard WorkerALIGN function_align 888*c0909341SAndroid Build Coastguard Workercglobal_label .main 889*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m0, m2 ;unpacked in0 in2 890*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;unpacked in0 in2 891*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m1, m3 ;unpacked in1 in3 892*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 ;unpacked in1 in3 893*c0909341SAndroid Build Coastguard Worker 894*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_3344_m3344)] 895*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_0_3344)] 896*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m2, m6 ;3344 * in0 - 3344 * in2 897*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m4, m7 ;3344 * in3 898*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m0 899*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m1 900*c0909341SAndroid Build Coastguard Worker paddd m3, m5 901*c0909341SAndroid Build Coastguard Worker paddd m2, m4 902*c0909341SAndroid Build Coastguard Worker mova m4, [o(pd_2048)] 903*c0909341SAndroid Build Coastguard Worker paddd m3, m4 ;t2 + 2048 904*c0909341SAndroid Build Coastguard Worker paddd m2, m4 905*c0909341SAndroid Build Coastguard Worker psrad m3, 12 906*c0909341SAndroid Build Coastguard Worker psrad m2, 12 907*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 ;out2 908*c0909341SAndroid Build Coastguard Worker 909*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 910*c0909341SAndroid Build Coastguard Worker pmaddwd m0, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 911*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m1, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 912*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1, [o(pw_3344_m3803)] ;3344 * in1 - 3803 * in3 913*c0909341SAndroid Build Coastguard Worker paddd m3, m4 ;t0 + t3 914*c0909341SAndroid Build Coastguard Worker 915*c0909341SAndroid Build Coastguard Worker pmaddwd m1, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 916*c0909341SAndroid Build Coastguard Worker mova m4, [o(pd_2048)] 917*c0909341SAndroid Build Coastguard Worker paddd m0, m4 918*c0909341SAndroid Build Coastguard Worker paddd m4, m3 ;t0 + t3 + 2048 919*c0909341SAndroid Build Coastguard Worker paddd m5, m0 ;t1 + t3 + 2048 920*c0909341SAndroid Build Coastguard Worker paddd m3, m0 921*c0909341SAndroid Build Coastguard Worker paddd m3, m1 ;t0 + t1 - t3 + 2048 922*c0909341SAndroid Build Coastguard Worker 923*c0909341SAndroid Build Coastguard Worker psrad m4, 12 ;out0 924*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ;out1 925*c0909341SAndroid Build Coastguard Worker psrad m3, 12 ;out3 926*c0909341SAndroid Build Coastguard Worker packssdw m0, m4, m5 ;low: out0 high: out1 927*c0909341SAndroid Build Coastguard Worker 928*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m6, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 929*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 930*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 931*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7, [o(pw_3344_m3803)] ;3344 * in1 - 3803 * in3 932*c0909341SAndroid Build Coastguard Worker paddd m1, m4 ;t0 + t3 933*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 934*c0909341SAndroid Build Coastguard Worker 935*c0909341SAndroid Build Coastguard Worker mova m4, [o(pd_2048)] 936*c0909341SAndroid Build Coastguard Worker paddd m6, m4 937*c0909341SAndroid Build Coastguard Worker paddd m4, m1 ;t0 + t3 + 2048 938*c0909341SAndroid Build Coastguard Worker paddd m5, m6 ;t1 + t3 + 2048 939*c0909341SAndroid Build Coastguard Worker paddd m1, m6 940*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ;t0 + t1 - t3 + 2048 941*c0909341SAndroid Build Coastguard Worker 942*c0909341SAndroid Build Coastguard Worker psrad m4, 12 ;out0 943*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ;out1 944*c0909341SAndroid Build Coastguard Worker psrad m1, 12 ;out3 945*c0909341SAndroid Build Coastguard Worker packssdw m3, m1 ;out3 946*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 ;low: out0 high: out1 947*c0909341SAndroid Build Coastguard Worker 948*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m4 ;out1 949*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 ;out0 950*c0909341SAndroid Build Coastguard Worker ret 951*c0909341SAndroid Build Coastguard Worker 952*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct 953*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst 954*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst 955*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity 956*c0909341SAndroid Build Coastguard Worker 957*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 958*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 959*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 960*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 961*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 962*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 963*c0909341SAndroid Build Coastguard Worker 964*c0909341SAndroid Build Coastguard Worker shufps m0, m0, q1032 965*c0909341SAndroid Build Coastguard Worker shufps m1, m1, q1032 966*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main 967*c0909341SAndroid Build Coastguard Worker 968*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m3, m2 969*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2 970*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m0 971*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m0 972*c0909341SAndroid Build Coastguard Worker 973*c0909341SAndroid Build Coastguard Worker pxor m0, m0 974*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 975*c0909341SAndroid Build Coastguard Worker psubsw m0, m5 976*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m0, m4 977*c0909341SAndroid Build Coastguard Worker punpckldq m0, m4 978*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m3, m1 979*c0909341SAndroid Build Coastguard Worker punpckldq m3, m1 980*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 ;in1 981*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 ;in0 982*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m4 ;in3 983*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4 ;in2 984*c0909341SAndroid Build Coastguard Worker jmp tx2q 985*c0909341SAndroid Build Coastguard Worker 986*c0909341SAndroid Build Coastguard Worker.pass2: 987*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 988*c0909341SAndroid Build Coastguard Worker mova m4, m0 989*c0909341SAndroid Build Coastguard Worker mova m5, m1 990*c0909341SAndroid Build Coastguard Worker mova m0, m3 991*c0909341SAndroid Build Coastguard Worker mova m1, m2 992*c0909341SAndroid Build Coastguard Worker mova m2, m5 993*c0909341SAndroid Build Coastguard Worker mova m3, m4 994*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end 995*c0909341SAndroid Build Coastguard Worker 996*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct 997*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst 998*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst 999*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity 1000*c0909341SAndroid Build Coastguard Worker 1001*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1002*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 1003*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [coeffq+16*0] 1004*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [coeffq+16*1] 1005*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [coeffq+16*2] 1006*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*3] 1007*c0909341SAndroid Build Coastguard Worker paddsw m0, m0 1008*c0909341SAndroid Build Coastguard Worker paddsw m1, m1 1009*c0909341SAndroid Build Coastguard Worker paddsw m2, m2 1010*c0909341SAndroid Build Coastguard Worker paddsw m3, m3 1011*c0909341SAndroid Build Coastguard Worker 1012*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 1013*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1014*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 1015*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 1016*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m4, m1 1017*c0909341SAndroid Build Coastguard Worker punpckldq m4, m1 1018*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 1019*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 1020*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m4 ;in1 1021*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 ;in0 1022*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m5 ;in2 1023*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m5 ;in3 1024*c0909341SAndroid Build Coastguard Worker jmp tx2q 1025*c0909341SAndroid Build Coastguard Worker 1026*c0909341SAndroid Build Coastguard Worker.pass2: 1027*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_1697x8)] 1028*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, m0 1029*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m1 1030*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m2 1031*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m3 1032*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 1033*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 1034*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 1035*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 1036*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end 1037*c0909341SAndroid Build Coastguard Worker 1038*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2 ; type1, type2 1039*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x8, 8, 16*4 1040*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1041*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 1042*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 1043*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 1044*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 1045*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_16384)] 1046*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 1047*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 1048*c0909341SAndroid Build Coastguard Worker psrlw m2, 3 1049*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 1050*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 1051*c0909341SAndroid Build Coastguard Worker.end: 1052*c0909341SAndroid Build Coastguard Worker mov r3d, 2 1053*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_8x8_8bpc).end3)] 1054*c0909341SAndroid Build Coastguard Worker.loop: 1055*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 0, 0, 0, 1, 2, 3 1056*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1057*c0909341SAndroid Build Coastguard Worker dec r3d 1058*c0909341SAndroid Build Coastguard Worker jg .loop 1059*c0909341SAndroid Build Coastguard Worker jmp tx2q 1060*c0909341SAndroid Build Coastguard Worker.end3: 1061*c0909341SAndroid Build Coastguard Worker RET 1062*c0909341SAndroid Build Coastguard Worker%endif 1063*c0909341SAndroid Build Coastguard Worker%endmacro 1064*c0909341SAndroid Build Coastguard Worker 1065*c0909341SAndroid Build Coastguard Worker%macro LOAD_8ROWS 2-3 0 ; src, stride, is_rect2 1066*c0909341SAndroid Build Coastguard Worker%if %3 1067*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896x8)] 1068*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [%1+%2*0] 1069*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [%1+%2*1] 1070*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [%1+%2*2] 1071*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [%1+%2*3] 1072*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, [%1+%2*4] 1073*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, [%1+%2*5] 1074*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [%1+%2*6] 1075*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [%1+%2*7] 1076*c0909341SAndroid Build Coastguard Worker%else 1077*c0909341SAndroid Build Coastguard Worker mova m0, [%1+%2*0] 1078*c0909341SAndroid Build Coastguard Worker mova m1, [%1+%2*1] 1079*c0909341SAndroid Build Coastguard Worker mova m2, [%1+%2*2] 1080*c0909341SAndroid Build Coastguard Worker mova m3, [%1+%2*3] 1081*c0909341SAndroid Build Coastguard Worker mova m4, [%1+%2*4] 1082*c0909341SAndroid Build Coastguard Worker mova m5, [%1+%2*5] 1083*c0909341SAndroid Build Coastguard Worker mova m6, [%1+%2*6] 1084*c0909341SAndroid Build Coastguard Worker mova m7, [%1+%2*7] 1085*c0909341SAndroid Build Coastguard Worker%endif 1086*c0909341SAndroid Build Coastguard Worker%endmacro 1087*c0909341SAndroid Build Coastguard Worker 1088*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D_ODDHALF 7 ; src[1-4], tmp[1-2], pd_2048 1089*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %1, %4, %5, %6, %7, 799, 4017 ;t4a, t7a 1090*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %3, %2, %5, %6, %7, 3406, 2276, 1 ;t5a, t6a 1091*c0909341SAndroid Build Coastguard Worker psubsw m%2, m%4, m%5 ;t6a 1092*c0909341SAndroid Build Coastguard Worker paddsw m%4, m%5 ;t7 1093*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%1, m%3 ;t5a 1094*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%3 ;t4 1095*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %2, %5, %3, %6, %7, 2896, 2896, 1 ;t5, t6 1096*c0909341SAndroid Build Coastguard Worker%endmacro 1097*c0909341SAndroid Build Coastguard Worker 1098*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct 1099*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst 1100*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst 1101*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity 1102*c0909341SAndroid Build Coastguard Worker 1103*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1104*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 16 1105*c0909341SAndroid Build Coastguard Worker 1106*c0909341SAndroid Build Coastguard Worker.pass1: 1107*c0909341SAndroid Build Coastguard Worker call .main 1108*c0909341SAndroid Build Coastguard Worker 1109*c0909341SAndroid Build Coastguard Worker.pass1_end: 1110*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 1111*c0909341SAndroid Build Coastguard Worker 1112*c0909341SAndroid Build Coastguard Worker.pass1_end1: 1113*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m2, m4, m6 1114*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1115*c0909341SAndroid Build Coastguard Worker 1116*c0909341SAndroid Build Coastguard Worker.pass1_end2: 1117*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m1, m3, m5 1118*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [rsp+gprsize+16*0] 1119*c0909341SAndroid Build Coastguard Worker 1120*c0909341SAndroid Build Coastguard Workercglobal_label .pass1_end3 1121*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m1, m5 ;10 50 11 51 12 52 13 53 1122*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m5 ;14 54 15 55 16 56 17 57 1123*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m4 ;04 44 05 45 06 46 07 47 1124*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 ;00 40 01 41 02 42 03 43 1125*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m7 ;34 74 35 75 36 76 37 77 1126*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m7 ;30 70 31 71 32 72 33 73 1127*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m1, m4 ;16 36 56 76 17 37 57 77 1128*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4 ;14 34 54 74 15 35 55 75 1129*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m6, m3 ;12 32 52 72 13 33 53 73 1130*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m3 ;10 30 50 70 11 31 51 71 1131*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m6 1132*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+16*1] 1133*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m6 ;24 64 25 65 26 66 27 67 1134*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m6 ;20 60 21 61 22 62 23 63 1135*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5, m3 ;06 26 46 66 07 27 47 67 1136*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m3 ;04 24 44 64 05 25 45 65 1137*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m2 ;02 22 42 62 03 23 43 63 1138*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;00 20 40 60 01 21 41 61 1139*c0909341SAndroid Build Coastguard Worker 1140*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m6, m7 ;07 17 27 37 47 57 67 77 1141*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 ;06 16 26 36 46 56 66 76 1142*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m2 1143*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 ;02 12 22 32 42 52 62 72 1144*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ;03 13 23 33 43 53 63 73 1145*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m1 ;04 14 24 34 44 54 64 74 1146*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m1 ;05 15 25 35 45 55 65 75 1147*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*2] 1148*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m7 ;01 11 21 31 41 51 61 71 1149*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7 ;00 10 20 30 40 50 60 70 1150*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 1151*c0909341SAndroid Build Coastguard Worker jmp tx2q 1152*c0909341SAndroid Build Coastguard Worker 1153*c0909341SAndroid Build Coastguard Worker.pass2: 1154*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x8_internal_8bpc).end4)] 1155*c0909341SAndroid Build Coastguard Worker 1156*c0909341SAndroid Build Coastguard Worker.pass2_main: 1157*c0909341SAndroid Build Coastguard Worker call .main 1158*c0909341SAndroid Build Coastguard Worker 1159*c0909341SAndroid Build Coastguard Worker.end: 1160*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1161*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m2, m4, m6 1162*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1163*c0909341SAndroid Build Coastguard Worker 1164*c0909341SAndroid Build Coastguard Worker.end2: 1165*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m1, m3, m5 1166*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [rsp+gprsize+16*0] 1167*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m5 1168*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 1169*c0909341SAndroid Build Coastguard Worker 1170*c0909341SAndroid Build Coastguard Worker.end3: 1171*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 2, 3, 5, 6, 7 1172*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1173*c0909341SAndroid Build Coastguard Worker WRITE_8X4 4, [rsp+gprsize+16*2], [rsp+gprsize+16*1], [rsp+gprsize+16*0], 5, 6, 7 1174*c0909341SAndroid Build Coastguard Worker jmp tx2q 1175*c0909341SAndroid Build Coastguard Worker 1176*c0909341SAndroid Build Coastguard Worker.end4: 1177*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1178*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 1179*c0909341SAndroid Build Coastguard Worker ret 1180*c0909341SAndroid Build Coastguard Worker 1181*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1182*c0909341SAndroid Build Coastguard Workercglobal_label .main 1183*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m7 1184*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m3 1185*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m1 1186*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 1187*c0909341SAndroid Build Coastguard Worker IDCT4_1D 0, 2, 4, 6, 1, 3, 7 1188*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*2] 1189*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m2 1190*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*1] 1191*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m4 1192*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*0] 1193*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m6 1194*c0909341SAndroid Build Coastguard Worker IDCT8_1D_ODDHALF 3, 2, 5, 4, 1, 6, 7 1195*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*0] 1196*c0909341SAndroid Build Coastguard Worker psubsw m7, m0, m4 ;out7 1197*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;out0 1198*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m7 1199*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*2] 1200*c0909341SAndroid Build Coastguard Worker psubsw m4, m6, m3 ;out4 1201*c0909341SAndroid Build Coastguard Worker paddsw m3, m6 ;out3 1202*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*1] 1203*c0909341SAndroid Build Coastguard Worker psubsw m6, m1, m5 ;out6 1204*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ;out1 1205*c0909341SAndroid Build Coastguard Worker psubsw m5, m7, m2 ;out5 1206*c0909341SAndroid Build Coastguard Worker paddsw m2, m7 ;out2 1207*c0909341SAndroid Build Coastguard Worker ret 1208*c0909341SAndroid Build Coastguard Worker 1209*c0909341SAndroid Build Coastguard Worker 1210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct 1211*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst 1212*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst 1213*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity 1214*c0909341SAndroid Build Coastguard Worker 1215*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1216*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 16 1217*c0909341SAndroid Build Coastguard Worker 1218*c0909341SAndroid Build Coastguard Worker.pass1: 1219*c0909341SAndroid Build Coastguard Worker call .main 1220*c0909341SAndroid Build Coastguard Worker call .main_pass1_end 1221*c0909341SAndroid Build Coastguard Worker 1222*c0909341SAndroid Build Coastguard Worker.pass1_end: 1223*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 1224*c0909341SAndroid Build Coastguard Worker 1225*c0909341SAndroid Build Coastguard Worker.pass1_end1: 1226*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m2, m4, m6 1227*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1228*c0909341SAndroid Build Coastguard Worker pxor m6, m6 1229*c0909341SAndroid Build Coastguard Worker psubw m6, m7 1230*c0909341SAndroid Build Coastguard Worker mova m7, m6 1231*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end2 1232*c0909341SAndroid Build Coastguard Worker 1233*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1234*c0909341SAndroid Build Coastguard Worker.pass2: 1235*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x8_internal_8bpc).end4)] 1236*c0909341SAndroid Build Coastguard Worker 1237*c0909341SAndroid Build Coastguard Worker.pass2_main: 1238*c0909341SAndroid Build Coastguard Worker call .main 1239*c0909341SAndroid Build Coastguard Worker call .main_pass2_end 1240*c0909341SAndroid Build Coastguard Worker 1241*c0909341SAndroid Build Coastguard Worker.end: 1242*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1243*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m2, m4, m6 1244*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1245*c0909341SAndroid Build Coastguard Worker pxor m6, m6 1246*c0909341SAndroid Build Coastguard Worker psubw m6, m7 1247*c0909341SAndroid Build Coastguard Worker mova m7, m6 1248*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end2 1249*c0909341SAndroid Build Coastguard Worker 1250*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1251*c0909341SAndroid Build Coastguard Workercglobal_label .main 1252*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m7 1253*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m3 1254*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m4 1255*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 1256*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 2, 3, 4, 7, 1931, 3612 ;t3a, t2a 1257*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 6, 3, 4, 7, 3920, 1189 ;t7a, t6a 1258*c0909341SAndroid Build Coastguard Worker paddsw m3, m2, m6 ;t2 1259*c0909341SAndroid Build Coastguard Worker psubsw m2, m6 ;t6 1260*c0909341SAndroid Build Coastguard Worker paddsw m4, m5, m1 ;t3 1261*c0909341SAndroid Build Coastguard Worker psubsw m5, m1 ;t7 1262*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 2, 1, 6, 7, 3784, 1567 ;t6a, t7a 1263*c0909341SAndroid Build Coastguard Worker 1264*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*2] 1265*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m5 1266*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*1] 1267*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m2 1268*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*0] 1269*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m3 1270*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 0, 2, 3, 7, 401, 4076 ;t1a, t0a 1271*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 6, 2, 3, 7, 3166, 2598 ;t5a, t4a 1272*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m6 ;t4 1273*c0909341SAndroid Build Coastguard Worker paddsw m0, m6 ;t0 1274*c0909341SAndroid Build Coastguard Worker paddsw m3, m5, m1 ;t1 1275*c0909341SAndroid Build Coastguard Worker psubsw m5, m1 ;t5 1276*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 5, 1, 6, 7, 1567, 3784 ;t5a, t4a 1277*c0909341SAndroid Build Coastguard Worker 1278*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*0] 1279*c0909341SAndroid Build Coastguard Worker paddsw m1, m3, m4 ;-out7 1280*c0909341SAndroid Build Coastguard Worker psubsw m3, m4 ;t3 1281*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m1 1282*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m7 ;t2 1283*c0909341SAndroid Build Coastguard Worker paddsw m0, m7 ;out0 1284*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*2] 1285*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*1] 1286*c0909341SAndroid Build Coastguard Worker paddsw m1, m5, m6 ;-out1 1287*c0909341SAndroid Build Coastguard Worker psubsw m5, m6 ;t6 1288*c0909341SAndroid Build Coastguard Worker paddsw m6, m2, m7 ;out6 1289*c0909341SAndroid Build Coastguard Worker psubsw m2, m7 ;t7 1290*c0909341SAndroid Build Coastguard Worker ret 1291*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1292*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 1293*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m1 1294*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m6 1295*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m4, m3 1296*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3 1297*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m5, m2 1298*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2 1299*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_2896_2896)] 1300*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 1301*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m2, m7 1302*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 1303*c0909341SAndroid Build Coastguard Worker paddd m3, m6 1304*c0909341SAndroid Build Coastguard Worker paddd m2, m6 1305*c0909341SAndroid Build Coastguard Worker psrad m3, 12 1306*c0909341SAndroid Build Coastguard Worker psrad m2, 12 1307*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 ;out2 1308*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896_m2896)] 1309*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3 1310*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 1311*c0909341SAndroid Build Coastguard Worker paddd m7, m6 1312*c0909341SAndroid Build Coastguard Worker paddd m5, m6 1313*c0909341SAndroid Build Coastguard Worker psrad m7, 12 1314*c0909341SAndroid Build Coastguard Worker psrad m5, 12 1315*c0909341SAndroid Build Coastguard Worker packssdw m5, m7 ;-out5 1316*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896_2896)] 1317*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3, m1 1318*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m4 1319*c0909341SAndroid Build Coastguard Worker paddd m7, m6 1320*c0909341SAndroid Build Coastguard Worker paddd m3, m6 1321*c0909341SAndroid Build Coastguard Worker psrad m7, 12 1322*c0909341SAndroid Build Coastguard Worker psrad m3, 12 1323*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 ;-out3 1324*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896_m2896)] 1325*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7 1326*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 1327*c0909341SAndroid Build Coastguard Worker paddd m1, m6 1328*c0909341SAndroid Build Coastguard Worker paddd m4, m6 1329*c0909341SAndroid Build Coastguard Worker psrad m1, 12 1330*c0909341SAndroid Build Coastguard Worker psrad m4, 12 1331*c0909341SAndroid Build Coastguard Worker packssdw m4, m1 ;-out5 1332*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*1] 1333*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*2] 1334*c0909341SAndroid Build Coastguard Worker ret 1335*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1336*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 1337*c0909341SAndroid Build Coastguard Worker paddsw m7, m4, m3 ;t2 + t3 1338*c0909341SAndroid Build Coastguard Worker psubsw m4, m3 ;t2 - t3 1339*c0909341SAndroid Build Coastguard Worker paddsw m3, m5, m2 ;t6 + t7 1340*c0909341SAndroid Build Coastguard Worker psubsw m5, m2 ;t6 - t7 1341*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_2896x8)] 1342*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m2 ;out4 1343*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m2 ;-out5 1344*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m2 ;-out3 1345*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3 ;out2 1346*c0909341SAndroid Build Coastguard Worker mova m3, m7 1347*c0909341SAndroid Build Coastguard Worker ret 1348*c0909341SAndroid Build Coastguard Worker 1349*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct 1350*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst 1351*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst 1352*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity 1353*c0909341SAndroid Build Coastguard Worker 1354*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1355*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 16 1356*c0909341SAndroid Build Coastguard Worker 1357*c0909341SAndroid Build Coastguard Worker.pass1: 1358*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main 1359*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass1_end 1360*c0909341SAndroid Build Coastguard Worker 1361*c0909341SAndroid Build Coastguard Worker.pass1_end: 1362*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m16384)] 1363*c0909341SAndroid Build Coastguard Worker 1364*c0909341SAndroid Build Coastguard Worker.pass1_end1: 1365*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 1366*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m1 1367*c0909341SAndroid Build Coastguard Worker mova m1, m6 1368*c0909341SAndroid Build Coastguard Worker mova m6, m2 1369*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5, m7 1370*c0909341SAndroid Build Coastguard Worker mova m5, m6 1371*c0909341SAndroid Build Coastguard Worker mova m6, m4 1372*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m3, m7 1373*c0909341SAndroid Build Coastguard Worker mova m3, m6 1374*c0909341SAndroid Build Coastguard Worker mova m6, m0 1375*c0909341SAndroid Build Coastguard Worker mova m0, m7 1376*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1377*c0909341SAndroid Build Coastguard Worker psubw m7, m0 1378*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [rsp+gprsize+16*0] 1379*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m1, m3, m5 1380*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m6 1381*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 1382*c0909341SAndroid Build Coastguard Worker 1383*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1384*c0909341SAndroid Build Coastguard Worker.pass2: 1385*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x8_internal_8bpc).end4)] 1386*c0909341SAndroid Build Coastguard Worker 1387*c0909341SAndroid Build Coastguard Worker.pass2_main: 1388*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main 1389*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass2_end 1390*c0909341SAndroid Build Coastguard Worker 1391*c0909341SAndroid Build Coastguard Worker.end: 1392*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1393*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m2, m4, m6 1394*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m2 1395*c0909341SAndroid Build Coastguard Worker mova m2, m0 1396*c0909341SAndroid Build Coastguard Worker pxor m0, m0 1397*c0909341SAndroid Build Coastguard Worker psubw m0, m7 1398*c0909341SAndroid Build Coastguard Worker mova m7, m2 1399*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m0 1400*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5, m0 1401*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m1 1402*c0909341SAndroid Build Coastguard Worker mova m5, m4 1403*c0909341SAndroid Build Coastguard Worker mova m1, m6 1404*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m3, m0 1405*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [rsp+gprsize+16*0] 1406*c0909341SAndroid Build Coastguard Worker mova m3, m5 1407*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 1408*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end3 1409*c0909341SAndroid Build Coastguard Worker 1410*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct 1411*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst 1412*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst 1413*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity 1414*c0909341SAndroid Build Coastguard Worker 1415*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1416*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 16 1417*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1418*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 1419*c0909341SAndroid Build Coastguard Worker 1420*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1421*c0909341SAndroid Build Coastguard Worker.pass2: 1422*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x8_internal_8bpc).end4)] 1423*c0909341SAndroid Build Coastguard Worker 1424*c0909341SAndroid Build Coastguard Worker.end: 1425*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [o(pw_4096)] 1426*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 1427*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_4096)] 1428*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 1429*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m5 1430*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 1431*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end3 1432*c0909341SAndroid Build Coastguard Worker 1433*c0909341SAndroid Build Coastguard Worker 1434*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X16_FN 2 ; type1, type2 1435*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x16, 8 1436*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1437*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 1438*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 1439*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 1440*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 1441*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 1442*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_16384)] 1443*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 1444*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2048)] 1445*c0909341SAndroid Build Coastguard Worker.end: 1446*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 0, 1, 2, 3, 0, 1, 2, 3 1447*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1448*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 0, 1, 2, 3, 0, 1, 2, 3 1449*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1450*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 0, 1, 2, 3, 0, 1, 2, 3 1451*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1452*c0909341SAndroid Build Coastguard Worker WRITE_4X4 0, 0, 1, 2, 3, 0, 1, 2, 3 1453*c0909341SAndroid Build Coastguard Worker RET 1454*c0909341SAndroid Build Coastguard Worker%endif 1455*c0909341SAndroid Build Coastguard Worker%endmacro 1456*c0909341SAndroid Build Coastguard Worker 1457*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct 1458*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst 1459*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst 1460*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity 1461*c0909341SAndroid Build Coastguard Worker 1462*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1463*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(idct_4x8_internal_8bpc).pass1)] 1464*c0909341SAndroid Build Coastguard Worker 1465*c0909341SAndroid Build Coastguard Worker.pass1: 1466*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*1] 1467*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*3] 1468*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*5] 1469*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*7] 1470*c0909341SAndroid Build Coastguard Worker push tx2q 1471*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_4x16_internal_8bpc).pass1_2)] 1472*c0909341SAndroid Build Coastguard Worker jmp r3 1473*c0909341SAndroid Build Coastguard Worker 1474*c0909341SAndroid Build Coastguard Worker.pass1_2: 1475*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m0 1476*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*3], m1 1477*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m2 1478*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m3 1479*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 1480*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*2] 1481*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*4] 1482*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*6] 1483*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_4x16_internal_8bpc).pass1_end)] 1484*c0909341SAndroid Build Coastguard Worker jmp r3 1485*c0909341SAndroid Build Coastguard Worker 1486*c0909341SAndroid Build Coastguard Worker.pass1_end: 1487*c0909341SAndroid Build Coastguard Worker pop tx2q 1488*c0909341SAndroid Build Coastguard Worker 1489*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1] 1490*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*3] 1491*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*5] 1492*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 1493*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 1494*c0909341SAndroid Build Coastguard Worker 1495*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*7] 1496*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m7 1497*c0909341SAndroid Build Coastguard Worker jmp tx2q 1498*c0909341SAndroid Build Coastguard Worker 1499*c0909341SAndroid Build Coastguard Worker.pass2: 1500*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_8bpc).main 1501*c0909341SAndroid Build Coastguard Worker 1502*c0909341SAndroid Build Coastguard Worker.end: 1503*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1504*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 1505*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*7] 1506*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m4 1507*c0909341SAndroid Build Coastguard Worker 1508*c0909341SAndroid Build Coastguard Worker.end1: 1509*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m5 1510*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m6 1511*c0909341SAndroid Build Coastguard Worker mov r3, coeffq 1512*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1, 3, 2 1513*c0909341SAndroid Build Coastguard Worker 1514*c0909341SAndroid Build Coastguard Worker mova m0, [r3+16*4] 1515*c0909341SAndroid Build Coastguard Worker mova m1, [r3+16*5] 1516*c0909341SAndroid Build Coastguard Worker mova m2, [r3+16*6] 1517*c0909341SAndroid Build Coastguard Worker mova m3, m7 1518*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1519*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1, 3, 2 1520*c0909341SAndroid Build Coastguard Worker 1521*c0909341SAndroid Build Coastguard Worker.end2: 1522*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1523*c0909341SAndroid Build Coastguard Worker REPX {mova [r3+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 1524*c0909341SAndroid Build Coastguard Worker ret 1525*c0909341SAndroid Build Coastguard Worker 1526*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct 1527*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst 1528*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst 1529*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity 1530*c0909341SAndroid Build Coastguard Worker 1531*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1532*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(iadst_4x8_internal_8bpc).pass1)] 1533*c0909341SAndroid Build Coastguard Worker jmp m(idct_4x16_internal_8bpc).pass1 1534*c0909341SAndroid Build Coastguard Worker 1535*c0909341SAndroid Build Coastguard Worker.pass2: 1536*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 1537*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main_pass2_end 1538*c0909341SAndroid Build Coastguard Worker 1539*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m5, m4 ;low: -out5 high: -out7 1540*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m5 ;low: out8 high: out10 1541*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m7, m2 ;low: out4 high: out6 1542*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m7 ;low: -out9 high: -out11 1543*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m2 1544*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m6 1545*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*6] 1546*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*7] 1547*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m6, m0 ;low: -out13 high: -out15 1548*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m6 ;low: out0 high: out2 1549*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m3, m2 ;low: out12 high: out14 1550*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3 ;low: -out1 high: -out3 1551*c0909341SAndroid Build Coastguard Worker 1552*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1553*c0909341SAndroid Build Coastguard Worker 1554*c0909341SAndroid Build Coastguard Worker.end1: 1555*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m5, m4, m6 1556*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1557*c0909341SAndroid Build Coastguard Worker psubw m3, m7 1558*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*4] 1559*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m3}, m2, m7, m1 1560*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [coeffq+16*5] 1561*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m5 1562*c0909341SAndroid Build Coastguard Worker 1563*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m4, m7 ;low: out10 high: out11 1564*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m7 ;low: out8 high: out9 1565*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m1 ;low: out14 high: out15 1566*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m1 ;low: out12 high: out13 1567*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 ;low: out2 high: out3 1568*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ;low: out0 high: out1 1569*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m4 1570*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*7] 1571*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m4, m3 ;low: out4 high: out5 1572*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m3 ;low: out6 high: out7 1573*c0909341SAndroid Build Coastguard Worker mova m3, m4 1574*c0909341SAndroid Build Coastguard Worker 1575*c0909341SAndroid Build Coastguard Worker.end2: 1576*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m5 1577*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m6 1578*c0909341SAndroid Build Coastguard Worker mov r3, coeffq 1579*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1, 2, 3 1580*c0909341SAndroid Build Coastguard Worker 1581*c0909341SAndroid Build Coastguard Worker mova m0, [r3+16*4] 1582*c0909341SAndroid Build Coastguard Worker mova m1, [r3+16*5] 1583*c0909341SAndroid Build Coastguard Worker mova m2, [r3+16*6] 1584*c0909341SAndroid Build Coastguard Worker mova m3, m7 1585*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1586*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1, 2, 3 1587*c0909341SAndroid Build Coastguard Worker 1588*c0909341SAndroid Build Coastguard Worker.end3: 1589*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1590*c0909341SAndroid Build Coastguard Worker REPX {mova [r3+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 1591*c0909341SAndroid Build Coastguard Worker ret 1592*c0909341SAndroid Build Coastguard Worker 1593*c0909341SAndroid Build Coastguard Worker 1594*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct 1595*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst 1596*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst 1597*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity 1598*c0909341SAndroid Build Coastguard Worker 1599*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1600*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(iflipadst_4x8_internal_8bpc).pass1)] 1601*c0909341SAndroid Build Coastguard Worker jmp m(idct_4x16_internal_8bpc).pass1 1602*c0909341SAndroid Build Coastguard Worker 1603*c0909341SAndroid Build Coastguard Worker.pass2: 1604*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 1605*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main_pass2_end 1606*c0909341SAndroid Build Coastguard Worker 1607*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m5, m4 ;low: out5 high: out7 1608*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5 ;low: -out8 high: -out10 1609*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7, m2 ;low: -out4 high: -out6 1610*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m7 ;low: out9 high: out11 1611*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m2 1612*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m6 1613*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*6] 1614*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*7] 1615*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m6, m0 ;low: out13 high: out15 1616*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m6 ;low: -out0 high: -out2 1617*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m3, m2 ;low: -out12 high: -out14 1618*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m3 ;low: out1 high: out3 1619*c0909341SAndroid Build Coastguard Worker 1620*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m2048)] 1621*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end1 1622*c0909341SAndroid Build Coastguard Worker 1623*c0909341SAndroid Build Coastguard Worker 1624*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct 1625*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst 1626*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst 1627*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity 1628*c0909341SAndroid Build Coastguard Worker 1629*c0909341SAndroid Build Coastguard Worker%macro IDTX16 3-4 ; src/dst, tmp, pw_1697x16, [pw_16394] 1630*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3, m%1 1631*c0909341SAndroid Build Coastguard Worker%if %0 == 4 ; if downshifting by 1 1632*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%4 1633*c0909341SAndroid Build Coastguard Worker%else 1634*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%1 1635*c0909341SAndroid Build Coastguard Worker%endif 1636*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%2 1637*c0909341SAndroid Build Coastguard Worker%endmacro 1638*c0909341SAndroid Build Coastguard Worker 1639*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1640*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*1] 1641*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_1697x8)] 1642*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*3] 1643*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*5] 1644*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*7] 1645*c0909341SAndroid Build Coastguard Worker pcmpeqw m7, m7 1646*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 1647*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_2)] 1648*c0909341SAndroid Build Coastguard Worker.pass1: 1649*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m6, m0 1650*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m1 1651*c0909341SAndroid Build Coastguard Worker pavgw m4, m0 1652*c0909341SAndroid Build Coastguard Worker pcmpeqw m0, m7 1653*c0909341SAndroid Build Coastguard Worker pavgw m5, m1 1654*c0909341SAndroid Build Coastguard Worker pcmpeqw m1, m7 1655*c0909341SAndroid Build Coastguard Worker pandn m0, m4 1656*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m6, m2 1657*c0909341SAndroid Build Coastguard Worker pandn m1, m5 1658*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m3 1659*c0909341SAndroid Build Coastguard Worker pavgw m4, m2 1660*c0909341SAndroid Build Coastguard Worker pcmpeqw m2, m7 1661*c0909341SAndroid Build Coastguard Worker pavgw m5, m3 1662*c0909341SAndroid Build Coastguard Worker pcmpeqw m3, m7 1663*c0909341SAndroid Build Coastguard Worker pandn m2, m4 1664*c0909341SAndroid Build Coastguard Worker pandn m3, m5 1665*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).pass1_end 1666*c0909341SAndroid Build Coastguard Worker.pass1_2: 1667*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m0 1668*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*3], m1 1669*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m2 1670*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m3 1671*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 1672*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*2] 1673*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*4] 1674*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*6] 1675*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 1676*c0909341SAndroid Build Coastguard Worker jmp .pass1 1677*c0909341SAndroid Build Coastguard Worker.pass1_end: 1678*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1] 1679*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*3] 1680*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*5] 1681*c0909341SAndroid Build Coastguard Worker jmp r3 1682*c0909341SAndroid Build Coastguard Worker.pass2: 1683*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_1697x16)] 1684*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m6 1685*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 6, 7}, 0, 1, 2, 3, 4, 5 1686*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*7] 1687*c0909341SAndroid Build Coastguard Worker IDTX16 6, 7, 7 1688*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m6 1689*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*6] 1690*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m6, [o(pw_1697x16)] 1691*c0909341SAndroid Build Coastguard Worker paddsw m6, m6 1692*c0909341SAndroid Build Coastguard Worker paddsw m6, m7 1693*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 1694*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 1695*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*7] 1696*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m4 1697*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end2 1698*c0909341SAndroid Build Coastguard Worker 1699*c0909341SAndroid Build Coastguard Worker 1700*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X4_FN 2 ; type1, type2 1701*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x4, 8 1702*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1703*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 1704*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 1705*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 1706*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 1707*c0909341SAndroid Build Coastguard Worker mov r2d, 2 1708*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_16x4_8bpc).end)] 1709*c0909341SAndroid Build Coastguard Worker.dconly: 1710*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 1711*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_2048)] ;intentionally rip-relative 1712*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 1713*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 1714*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 1715*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 1716*c0909341SAndroid Build Coastguard Worker pxor m5, m5 1717*c0909341SAndroid Build Coastguard Worker.dconly_loop: 1718*c0909341SAndroid Build Coastguard Worker mova m1, [dstq] 1719*c0909341SAndroid Build Coastguard Worker mova m3, [dstq+strideq] 1720*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m1, m5 1721*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m5 1722*c0909341SAndroid Build Coastguard Worker punpckhbw m4, m3, m5 1723*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m5 1724*c0909341SAndroid Build Coastguard Worker paddw m2, m0 1725*c0909341SAndroid Build Coastguard Worker paddw m1, m0 1726*c0909341SAndroid Build Coastguard Worker paddw m4, m0 1727*c0909341SAndroid Build Coastguard Worker paddw m3, m0 1728*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 1729*c0909341SAndroid Build Coastguard Worker packuswb m3, m4 1730*c0909341SAndroid Build Coastguard Worker mova [dstq], m1 1731*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq], m3 1732*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1733*c0909341SAndroid Build Coastguard Worker dec r2d 1734*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 1735*c0909341SAndroid Build Coastguard Worker jmp tx2q 1736*c0909341SAndroid Build Coastguard Worker.end: 1737*c0909341SAndroid Build Coastguard Worker RET 1738*c0909341SAndroid Build Coastguard Worker%endif 1739*c0909341SAndroid Build Coastguard Worker%endmacro 1740*c0909341SAndroid Build Coastguard Worker 1741*c0909341SAndroid Build Coastguard Worker%macro LOAD_7ROWS 2 ;src, stride 1742*c0909341SAndroid Build Coastguard Worker mova m0, [%1+%2*0] 1743*c0909341SAndroid Build Coastguard Worker mova m1, [%1+%2*1] 1744*c0909341SAndroid Build Coastguard Worker mova m2, [%1+%2*2] 1745*c0909341SAndroid Build Coastguard Worker mova m3, [%1+%2*3] 1746*c0909341SAndroid Build Coastguard Worker mova m4, [%1+%2*4] 1747*c0909341SAndroid Build Coastguard Worker mova m5, [%1+%2*5] 1748*c0909341SAndroid Build Coastguard Worker mova m6, [%1+%2*6] 1749*c0909341SAndroid Build Coastguard Worker%endmacro 1750*c0909341SAndroid Build Coastguard Worker 1751*c0909341SAndroid Build Coastguard Worker%macro SAVE_7ROWS 2 ;src, stride 1752*c0909341SAndroid Build Coastguard Worker mova [%1+%2*0], m0 1753*c0909341SAndroid Build Coastguard Worker mova [%1+%2*1], m1 1754*c0909341SAndroid Build Coastguard Worker mova [%1+%2*2], m2 1755*c0909341SAndroid Build Coastguard Worker mova [%1+%2*3], m3 1756*c0909341SAndroid Build Coastguard Worker mova [%1+%2*4], m4 1757*c0909341SAndroid Build Coastguard Worker mova [%1+%2*5], m5 1758*c0909341SAndroid Build Coastguard Worker mova [%1+%2*6], m6 1759*c0909341SAndroid Build Coastguard Worker%endmacro 1760*c0909341SAndroid Build Coastguard Worker 1761*c0909341SAndroid Build Coastguard Worker%macro IDCT16_1D_PACKED_ODDHALF 7 ;src[1-4], tmp[1-3] 1762*c0909341SAndroid Build Coastguard Worker punpckhwd m%5, m%4, m%1 ;packed in13 in3 1763*c0909341SAndroid Build Coastguard Worker punpcklwd m%1, m%4 ;packed in1 in15 1764*c0909341SAndroid Build Coastguard Worker punpcklwd m%4, m%3, m%2 ;packed in9 in7 1765*c0909341SAndroid Build Coastguard Worker punpckhwd m%2, m%3 ;packed in5 in11 1766*c0909341SAndroid Build Coastguard Worker mova m%7, [o(pd_2048)] 1767*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %1, %6, %7, 401, 4076, 1 ;low: t8a high: t15a 1768*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %4, %6, %7, 3166, 2598, 1 ;low: t9a high: t14a 1769*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %2, %6, %7, 1931, 3612, 1 ;low: t10a high: t13a 1770*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %5, %6, %7, 3920, 1189, 1 ;low: t11a high: t12a 1771*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%1, m%4 ;low: t9 high: t14 1772*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%4 ;low: t8 high: t15 1773*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%5, m%2 ;low: t10 high: t13 1774*c0909341SAndroid Build Coastguard Worker paddsw m%5, m%2 ;low: t11 high: t12 1775*c0909341SAndroid Build Coastguard Worker mova m%2, [o(deint_shuf2)] 1776*c0909341SAndroid Build Coastguard Worker pshufb m%6, m%2 1777*c0909341SAndroid Build Coastguard Worker pshufb m%4, m%2 1778*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %6, %3, %7, 1567, 3784, 1 ;low: t9a high: t14a 1779*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %4, %3, %7, m3784, 1567, 1 ;low: t10a high: t13a 1780*c0909341SAndroid Build Coastguard Worker psubsw m%3, m%1, m%5 ;low: t11a high: t12a 1781*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%5 ;low: t8a high: t15a 1782*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%6, m%4 ;low: t10 high: t13 1783*c0909341SAndroid Build Coastguard Worker paddsw m%6, m%4 ;low: t9 high: t14 1784*c0909341SAndroid Build Coastguard Worker pshufb m%3, m%2 1785*c0909341SAndroid Build Coastguard Worker pshufb m%5, m%2 1786*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %3, %2, %7, 2896, 2896, 4 ;t12, t11 1787*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %5, %4, %7, 2896, 2896, 4 ;t13a, t10a 1788*c0909341SAndroid Build Coastguard Worker packssdw m%2, m%4 ;low: t11 high: t10a 1789*c0909341SAndroid Build Coastguard Worker packssdw m%3, m%5 ;low: t12 high: t13a 1790*c0909341SAndroid Build Coastguard Worker punpckhqdq m%4, m%1, m%6 ;low: t15a high: t14 1791*c0909341SAndroid Build Coastguard Worker punpcklqdq m%1, m%6 ;low: t8a high: t9 1792*c0909341SAndroid Build Coastguard Worker%endmacro 1793*c0909341SAndroid Build Coastguard Worker 1794*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct 1795*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst 1796*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst 1797*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity 1798*c0909341SAndroid Build Coastguard Worker 1799*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1800*c0909341SAndroid Build Coastguard Worker LOAD_7ROWS coeffq, 16 1801*c0909341SAndroid Build Coastguard Worker call .main 1802*c0909341SAndroid Build Coastguard Worker 1803*c0909341SAndroid Build Coastguard Worker.pass1_end: 1804*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m2 ;packed out1, out5 1805*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;packed out0, out4 1806*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m3 ;packed out3, out7 1807*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 ;packed out2, out6 1808*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m7 1809*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*7] 1810*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4, m6 ;packed out9, out13 1811*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m6 ;packed out8, out12 1812*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m5, m7 ;packed out11, out15 1813*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 ;packed out10, out14 1814*c0909341SAndroid Build Coastguard Worker 1815*c0909341SAndroid Build Coastguard Worker.pass1_end2: 1816*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 1817*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 1818*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*6] 1819*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m7 1820*c0909341SAndroid Build Coastguard Worker 1821*c0909341SAndroid Build Coastguard Worker.pass1_end3: 1822*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m3, m6 ;packed 9, 11, 13, 15 high 1823*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6 ;packed 9, 10, 13, 15 low 1824*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m4, m5 ;packed 8, 10, 12, 14 high 1825*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ;packed 8, 10, 12, 14 low 1826*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m4, m3 ;8, 9, 10, 11, 12, 13, 14, 15(1) 1827*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3 ;8, 9, 10, 11, 12, 13, 14, 15(0) 1828*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m6, m7 ;8, 9, 10, 11, 12, 13, 14, 15(3) 1829*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 ;8, 9, 10, 11, 12, 13, 14, 15(2) 1830*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m3 1831*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*6] 1832*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m3, m2 ;packed 1, 3, 5, 7 high 1833*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2 ;packed 1, 3, 5, 7 low 1834*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 ;packed 0, 2, 4, 6 high 1835*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 ;packed 0, 2, 4, 6 low 1836*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 ;0, 1, 2, 3, 4, 5, 6, 7(1) 1837*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 ;0, 1, 2, 3, 4, 5, 6, 7(0) 1838*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m7 ;0, 1, 2, 3, 4, 5, 6, 7(3) 1839*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m7 ;0, 1, 2, 3, 4, 5, 6, 7(2) 1840*c0909341SAndroid Build Coastguard Worker jmp tx2q 1841*c0909341SAndroid Build Coastguard Worker 1842*c0909341SAndroid Build Coastguard Worker.pass2: 1843*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x4_internal_8bpc).pass2)] 1844*c0909341SAndroid Build Coastguard Worker 1845*c0909341SAndroid Build Coastguard Worker.pass2_end: 1846*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m4 1847*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m5 1848*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m6 1849*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 1850*c0909341SAndroid Build Coastguard Worker call tx2q 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker add coeffq, 16*4 1853*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 1854*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 1855*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*2] 1856*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*3] 1857*c0909341SAndroid Build Coastguard Worker mov dstq, r3 1858*c0909341SAndroid Build Coastguard Worker jmp tx2q 1859*c0909341SAndroid Build Coastguard Worker 1860*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1861*c0909341SAndroid Build Coastguard Workercglobal_label .main 1862*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m0, m1 ;low:in1 high:in3 1863*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 1864*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m2, m3 1865*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m2 ;low:in7 high:in5 1866*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m7 1867*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m3 1868*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*7] 1869*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m4, m5 1870*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m5 ;low:in9 high:in11 1871*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m6, m7 1872*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6 ;low:in15 high:in13 1873*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m4 1874*c0909341SAndroid Build Coastguard Worker IDCT8_1D_PACKED 1875*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*4] 1876*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5] 1877*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*6] 1878*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m1 1879*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m2 1880*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m3 1881*c0909341SAndroid Build Coastguard Worker 1882*c0909341SAndroid Build Coastguard Worker IDCT16_1D_PACKED_ODDHALF 6, 4, 5, 7, 1, 2, 3 1883*c0909341SAndroid Build Coastguard Worker 1884*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*4] 1885*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m7 ;low:out15 high:out14 1886*c0909341SAndroid Build Coastguard Worker paddsw m0, m7 ;low:out0 high:out1 1887*c0909341SAndroid Build Coastguard Worker psubsw m7, m1, m5 ;low:out12 high:out13 1888*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ;low:out3 high:out2 1889*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m3 1890*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*5] 1891*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*6] 1892*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m4 ;low:out11 high:out10 1893*c0909341SAndroid Build Coastguard Worker paddsw m2, m4 ;low:out4 high:out5 1894*c0909341SAndroid Build Coastguard Worker psubsw m4, m3, m6 ;low:out8 high:out9 1895*c0909341SAndroid Build Coastguard Worker paddsw m3, m6 ;low:out7 high:out6 1896*c0909341SAndroid Build Coastguard Worker mova m6, m7 1897*c0909341SAndroid Build Coastguard Worker ret 1898*c0909341SAndroid Build Coastguard Worker 1899*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct 1900*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst 1901*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst 1902*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity 1903*c0909341SAndroid Build Coastguard Worker 1904*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 1905*c0909341SAndroid Build Coastguard Worker LOAD_7ROWS coeffq, 16 1906*c0909341SAndroid Build Coastguard Worker call .main 1907*c0909341SAndroid Build Coastguard Worker call .main_pass1_end 1908*c0909341SAndroid Build Coastguard Worker 1909*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7, m0 ;packed -out11, -out15 1910*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7 ;packed out0, out4 1911*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m3, m4 ;packed -out3, -out7 1912*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3 ;packed out8, out12 1913*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6] 1914*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1, m5 ;packed -out1, -out5 1915*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m1 ;packed out10, out14 1916*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*7] 1917*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m3 1918*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m7 1919*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m1 ;packed -out9, -out13 1920*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ;packed out2, out6 1921*c0909341SAndroid Build Coastguard Worker 1922*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 1923*c0909341SAndroid Build Coastguard Worker 1924*c0909341SAndroid Build Coastguard Worker.pass1_end: 1925*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m4, m5 1926*c0909341SAndroid Build Coastguard Worker pxor m2, m2 1927*c0909341SAndroid Build Coastguard Worker psubw m2, m7 1928*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*6] 1929*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m2}, m7, m3, m6 1930*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, [coeffq+16*7] 1931*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m7 1932*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_8bpc).pass1_end3 1933*c0909341SAndroid Build Coastguard Worker 1934*c0909341SAndroid Build Coastguard Worker.pass2: 1935*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iadst_8x4_internal_8bpc).pass2)] 1936*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_8bpc).pass2_end 1937*c0909341SAndroid Build Coastguard Worker 1938*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1939*c0909341SAndroid Build Coastguard Workercglobal_label .main 1940*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m0 1941*c0909341SAndroid Build Coastguard Worker pshufd m0, m1, q1032 1942*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1032 1943*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6, m0 ;packed in13, in2 1944*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m6 ;packed in3, in12 1945*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m5, m2 ;packed in11, in4 1946*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ;packed in5, in10 1947*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 1948*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 5, 6, 995, 3973 ;low:t2 high:t3 1949*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 5, 6, 1751, 3703 ;low:t4 high:t5 1950*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 5, 6, 3513, 2106 ;low:t10 high:t11 1951*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 5, 6, 3857, 1380 ;low:t12 high:t13 1952*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m2 ;low:t10a high:t11a 1953*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 ;low:t2a high:t3a 1954*c0909341SAndroid Build Coastguard Worker psubsw m2, m7, m0 ;low:t12a high:t13a 1955*c0909341SAndroid Build Coastguard Worker paddsw m7, m0 ;low:t4a high:t5a 1956*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m5 1957*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m5 ;packed t10a, t11a 1958*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m2 1959*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m5 ;packed t13a, t12a 1960*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 5, 6, 3406, 2276 ;low:t10 high:t11 1961*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 5, 6, 4017, 799, 1 ;low:t12 high:t13 1962*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m1 1963*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m7 1964*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6] 1965*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*7] 1966*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 1967*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 1968*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7, m1 ;packed in15, in0 1969*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m7 ;packed in1, in14 1970*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m4, m3 ;packed in9, in6 1971*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ;packed in7, in8 1972*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 4, 6, 201, 4091 ;low:t0 high:t1 1973*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 4, 6, 2440, 3290 ;low:t6 high:t7 1974*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 6, 3035, 2751 ;low:t8 high:t9 1975*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 4, 6, 4052, 601 ;low:t14 high:t15 1976*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m3 ;low:t8a high:t9a 1977*c0909341SAndroid Build Coastguard Worker paddsw m5, m3 ;low:t0a high:t1a 1978*c0909341SAndroid Build Coastguard Worker psubsw m3, m7, m1 ;low:t14a high:t15a 1979*c0909341SAndroid Build Coastguard Worker paddsw m7, m1 ;low:t6a high:t7a 1980*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m4 1981*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m4 ;packed t8a, t9a 1982*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m3 1983*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ;packed t15a, t14a 1984*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 4, 6, 799, 4017 ;low:t8 high:t9 1985*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 6, 2276, 3406, 1 ;low:t14 high:t15 1986*c0909341SAndroid Build Coastguard Worker paddsw m4, m1, m2 ;low:t12a high:t13a 1987*c0909341SAndroid Build Coastguard Worker psubsw m1, m2 ;low:t8a high:t9a 1988*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m3 ;low:t14a high:t15a 1989*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ;low:t10a high:t11a 1990*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m1 1991*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m1 ;packed t12a, t13a 1992*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m2 1993*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1 ;packed t15a, t14a 1994*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 1, 6, 1567, 3784 ;low:t12 high:t13 1995*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 1, 6, 3784, 1567, 1 ;low:t14 high:t15 1996*c0909341SAndroid Build Coastguard Worker psubsw m1, m3, m2 ;low:t14a high:t15a 1997*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;low:out2 high:-out13 1998*c0909341SAndroid Build Coastguard Worker psubsw m2, m4, m0 ;low:t10 high:t11 1999*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;low:-out1 high:out14 2000*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m0 2001*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m3 2002*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4] 2003*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*5] 2004*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m3 ;low:t4 high:t5 2005*c0909341SAndroid Build Coastguard Worker paddsw m5, m3 ;low:t0 high:t1 2006*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m7 ;low:t6 high:t7 2007*c0909341SAndroid Build Coastguard Worker paddsw m0, m7 ;low:t2 high:t3 2008*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m4 2009*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m4 ;packed t4, t5 2010*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m3 2011*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ;packed t7, t6 2012*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 4, 6, 1567, 3784 ;low:t4a high:t5a 2013*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 6, 3784, 1567, 1 ;low:t6a high:t7a 2014*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m0 ;low:t2a high:t3a 2015*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;low:out0 high:-out15 2016*c0909341SAndroid Build Coastguard Worker psubsw m5, m7, m3 ;low:t6 high:t7 2017*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 ;low:-out3 high:out12 2018*c0909341SAndroid Build Coastguard Worker ret 2019*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2020*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 2021*c0909341SAndroid Build Coastguard Worker mova m7, [o(deint_shuf1)] 2022*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4], m0 2023*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m3 2024*c0909341SAndroid Build Coastguard Worker mova m0, [o(pw_2896_m2896)] 2025*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896_2896)] 2026*c0909341SAndroid Build Coastguard Worker pshufb m1, m7 ;t14a t15a 2027*c0909341SAndroid Build Coastguard Worker pshufb m2, m7 ;t10 t11 2028*c0909341SAndroid Build Coastguard Worker pshufb m4, m7 ;t2a t3a 2029*c0909341SAndroid Build Coastguard Worker pshufb m5, m7 ;t6 t7 2030*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m0, m2 2031*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 2032*c0909341SAndroid Build Coastguard Worker paddd m7, m6 2033*c0909341SAndroid Build Coastguard Worker paddd m2, m6 2034*c0909341SAndroid Build Coastguard Worker psrad m7, 12 2035*c0909341SAndroid Build Coastguard Worker psrad m2, 12 2036*c0909341SAndroid Build Coastguard Worker packssdw m2, m7 ;low:out6 high:-out9 2037*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m0, m4 2038*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m3 2039*c0909341SAndroid Build Coastguard Worker paddd m7, m6 2040*c0909341SAndroid Build Coastguard Worker paddd m4, m6 2041*c0909341SAndroid Build Coastguard Worker psrad m7, 12 2042*c0909341SAndroid Build Coastguard Worker psrad m4, 12 2043*c0909341SAndroid Build Coastguard Worker packssdw m4, m7 ;low:-out7 high:out8 2044*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3, m5 2045*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m0 2046*c0909341SAndroid Build Coastguard Worker paddd m7, m6 2047*c0909341SAndroid Build Coastguard Worker paddd m5, m6 2048*c0909341SAndroid Build Coastguard Worker psrad m7, 12 2049*c0909341SAndroid Build Coastguard Worker psrad m5, 12 2050*c0909341SAndroid Build Coastguard Worker packssdw m7, m5 ;low:out4 high:-out11 2051*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3, m1 2052*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m0 2053*c0909341SAndroid Build Coastguard Worker paddd m5, m6 2054*c0909341SAndroid Build Coastguard Worker paddd m1, m6 2055*c0909341SAndroid Build Coastguard Worker psrad m5, 12 2056*c0909341SAndroid Build Coastguard Worker psrad m1, 12 2057*c0909341SAndroid Build Coastguard Worker packssdw m5, m1 ;low:-out5 high:out10 2058*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4] 2059*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*5] 2060*c0909341SAndroid Build Coastguard Worker ret 2061*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2062*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 2063*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896x8)] 2064*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m2, m1 ;low:t11 high:t15a 2065*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1 ;low:t10 high:t14a 2066*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m6 2067*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 2068*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m4, m5 ;low:t3a high:t7 2069*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5 ;low:t2a high:t6 2070*c0909341SAndroid Build Coastguard Worker psubsw m5, m4, m6 2071*c0909341SAndroid Build Coastguard Worker paddsw m4, m6 2072*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 ;low:-out9 high:out10 2073*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7 ;low:out6 high:-out5 2074*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7 ;low:out8 high:-out11 2075*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7 ;low:-out7 high:out4 2076*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m4, m5 ;low:out4 high:-out11 2077*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5 ;low:-out7 high:out8 2078*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m2, m1 ;low:-out5 high:out10 2079*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1 ;low:out6 high:-out9 2080*c0909341SAndroid Build Coastguard Worker ret 2081*c0909341SAndroid Build Coastguard Worker 2082*c0909341SAndroid Build Coastguard Worker 2083*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct 2084*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst 2085*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst 2086*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity 2087*c0909341SAndroid Build Coastguard Worker 2088*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2089*c0909341SAndroid Build Coastguard Worker LOAD_7ROWS coeffq, 16 2090*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 2091*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main_pass1_end 2092*c0909341SAndroid Build Coastguard Worker 2093*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ;packed out11, out15 2094*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m7 ;packed -out0, -out4 2095*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m3, m4 ;packed out3, out7 2096*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3 ;packed -out8, -out12 2097*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6] 2098*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m1, m5 ;packed out1, out5 2099*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m1 ;packed -out10, -out14 2100*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*7] 2101*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m3 2102*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m7 2103*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2, m1 ;packed out9, out13 2104*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ;packed -out2, -out6 2105*c0909341SAndroid Build Coastguard Worker 2106*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m16384)] 2107*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_8bpc).pass1_end 2108*c0909341SAndroid Build Coastguard Worker 2109*c0909341SAndroid Build Coastguard Worker.pass2: 2110*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iflipadst_8x4_internal_8bpc).pass2)] 2111*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_8bpc).pass2_end 2112*c0909341SAndroid Build Coastguard Worker 2113*c0909341SAndroid Build Coastguard Worker 2114*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct 2115*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst 2116*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst 2117*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity 2118*c0909341SAndroid Build Coastguard Worker 2119*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2120*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6] 2121*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*5] 2122*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*7] 2123*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_1697x16)] 2124*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_16384)] 2125*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m6, m1 2126*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m6, m0 2127*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m2 2128*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7 2129*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7 2130*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7 2131*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 2132*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 2133*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 2134*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*2] 2135*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*3] 2136*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*4] 2137*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m1 2138*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m0 2139*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m5 2140*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6, m2 2141*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6, m3 2142*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m4 2143*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 2144*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 2145*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7 2146*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 2147*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 2148*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 2149*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 2150*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 2151*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m0 2152*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m1 2153*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7 2154*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7 2155*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 2156*c0909341SAndroid Build Coastguard Worker paddsw m1, m6 2157*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*6] 2158*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*5] 2159*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m2 ;packed out1, out5 2160*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 ;packed out0, out4 2161*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m3 ;packed out3, out7 2162*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 ;packed out2, out6 2163*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6], m7 2164*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*7] 2165*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4, m6 ;packed out9, out13 2166*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m6 ;packed out8, out12 2167*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5, m7 ;packed out11, out15 2168*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m7 ;packed out10, out14 2169*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_8bpc).pass1_end3 2170*c0909341SAndroid Build Coastguard Worker 2171*c0909341SAndroid Build Coastguard Worker.pass2: 2172*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iidentity_8x4_internal_8bpc).pass2)] 2173*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_8bpc).pass2_end 2174*c0909341SAndroid Build Coastguard Worker 2175*c0909341SAndroid Build Coastguard Worker 2176*c0909341SAndroid Build Coastguard Worker%macro SAVE_8ROWS 2 ;src, stride 2177*c0909341SAndroid Build Coastguard Worker mova [%1+%2*0], m0 2178*c0909341SAndroid Build Coastguard Worker mova [%1+%2*1], m1 2179*c0909341SAndroid Build Coastguard Worker mova [%1+%2*2], m2 2180*c0909341SAndroid Build Coastguard Worker mova [%1+%2*3], m3 2181*c0909341SAndroid Build Coastguard Worker mova [%1+%2*4], m4 2182*c0909341SAndroid Build Coastguard Worker mova [%1+%2*5], m5 2183*c0909341SAndroid Build Coastguard Worker mova [%1+%2*6], m6 2184*c0909341SAndroid Build Coastguard Worker mova [%1+%2*7], m7 2185*c0909341SAndroid Build Coastguard Worker%endmacro 2186*c0909341SAndroid Build Coastguard Worker 2187*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2 ; type1, type2 2188*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x16, 8, 16*16 2189*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2190*c0909341SAndroid Build Coastguard Worker pshuflw m0, [coeffq], q0000 2191*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 2192*c0909341SAndroid Build Coastguard Worker mova m1, [o(pw_2896x8)] 2193*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2194*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_16384)] 2195*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 2196*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2197*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 2198*c0909341SAndroid Build Coastguard Worker psrlw m2, 3 ; pw_2048 2199*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2200*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 2201*c0909341SAndroid Build Coastguard Worker mov r3d, 4 2202*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_8x16_8bpc).end)] 2203*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_8bpc).loop 2204*c0909341SAndroid Build Coastguard Worker.end: 2205*c0909341SAndroid Build Coastguard Worker RET 2206*c0909341SAndroid Build Coastguard Worker%endif 2207*c0909341SAndroid Build Coastguard Worker%endmacro 2208*c0909341SAndroid Build Coastguard Worker 2209*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct 2210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst 2211*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst 2212*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity 2213*c0909341SAndroid Build Coastguard Worker 2214*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2215*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(idct_8x8_internal_8bpc).pass1)] 2216*c0909341SAndroid Build Coastguard Worker 2217*c0909341SAndroid Build Coastguard Worker.pass1: 2218*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32, 1 2219*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize+16*11], tx2q 2220*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).pass1_end)] 2221*c0909341SAndroid Build Coastguard Worker jmp r3 2222*c0909341SAndroid Build Coastguard Worker 2223*c0909341SAndroid Build Coastguard Worker.pass1_end: 2224*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 2225*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 32, 1 2226*c0909341SAndroid Build Coastguard Worker mov tx2q, [rsp+gprsize+16*11] 2227*c0909341SAndroid Build Coastguard Worker jmp r3 2228*c0909341SAndroid Build Coastguard Worker 2229*c0909341SAndroid Build Coastguard Worker.pass2: 2230*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end)] 2231*c0909341SAndroid Build Coastguard Worker 2232*c0909341SAndroid Build Coastguard Worker.pass2_pre: 2233*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*2 ], m1 2234*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6 ], m3 2235*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*10], m5 2236*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*14], m7 2237*c0909341SAndroid Build Coastguard Worker mova m1, m2 2238*c0909341SAndroid Build Coastguard Worker mova m2, m4 2239*c0909341SAndroid Build Coastguard Worker mova m3, m6 2240*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1 ] 2241*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*5 ] 2242*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*9 ] 2243*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*13] 2244*c0909341SAndroid Build Coastguard Worker 2245*c0909341SAndroid Build Coastguard Worker.pass2_main: 2246*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 2247*c0909341SAndroid Build Coastguard Worker 2248*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 2249*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*2 ] 2250*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6 ] 2251*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*10] 2252*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*14] 2253*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*3 ] 2254*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*7 ] 2255*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*11] 2256*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*15] 2257*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 2258*c0909341SAndroid Build Coastguard Worker 2259*c0909341SAndroid Build Coastguard Worker mov r3, dstq 2260*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 2261*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 2262*c0909341SAndroid Build Coastguard Worker 2263*c0909341SAndroid Build Coastguard Worker.end: 2264*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2265*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2266*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2267*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2268*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 2269*c0909341SAndroid Build Coastguard Worker 2270*c0909341SAndroid Build Coastguard Worker.end1: 2271*c0909341SAndroid Build Coastguard Worker pxor m7, m7 2272*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 2273*c0909341SAndroid Build Coastguard Worker ret 2274*c0909341SAndroid Build Coastguard Worker 2275*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct 2276*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst 2277*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst 2278*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity 2279*c0909341SAndroid Build Coastguard Worker 2280*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2281*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(iadst_8x8_internal_8bpc).pass1)] 2282*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass1 2283*c0909341SAndroid Build Coastguard Worker 2284*c0909341SAndroid Build Coastguard Worker.pass2: 2285*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iadst_8x16_internal_8bpc).end)] 2286*c0909341SAndroid Build Coastguard Worker 2287*c0909341SAndroid Build Coastguard Worker.pass2_pre: 2288*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 2289*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 2290*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m6 2291*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m7 2292*c0909341SAndroid Build Coastguard Worker mova m0, m2 2293*c0909341SAndroid Build Coastguard Worker mova m1, m3 2294*c0909341SAndroid Build Coastguard Worker mova m2, m4 2295*c0909341SAndroid Build Coastguard Worker mova m3, m5 2296*c0909341SAndroid Build Coastguard Worker 2297*c0909341SAndroid Build Coastguard Worker.pass2_main: 2298*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1 ] 2299*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*3 ] 2300*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*13] 2301*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*15] 2302*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m4 2303*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m5 2304*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m6 2305*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m7 2306*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5 ] 2307*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*7 ] 2308*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*9 ] 2309*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*11] 2310*c0909341SAndroid Build Coastguard Worker 2311*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 2312*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2_end 2313*c0909341SAndroid Build Coastguard Worker 2314*c0909341SAndroid Build Coastguard Worker mov r3, dstq 2315*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 2316*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end 2317*c0909341SAndroid Build Coastguard Worker 2318*c0909341SAndroid Build Coastguard Worker.end: 2319*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2320*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2321*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2322*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2323*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end 2324*c0909341SAndroid Build Coastguard Worker 2325*c0909341SAndroid Build Coastguard Worker 2326*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct 2327*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst 2328*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst 2329*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity 2330*c0909341SAndroid Build Coastguard Worker 2331*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2332*c0909341SAndroid Build Coastguard Worker lea r3, [o(m(iflipadst_8x8_internal_8bpc).pass1)] 2333*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass1 2334*c0909341SAndroid Build Coastguard Worker 2335*c0909341SAndroid Build Coastguard Worker.pass2: 2336*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iflipadst_8x16_internal_8bpc).end)] 2337*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+strideq*8] 2338*c0909341SAndroid Build Coastguard Worker 2339*c0909341SAndroid Build Coastguard Worker.pass2_pre: 2340*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 2341*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 2342*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m6 2343*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m7 2344*c0909341SAndroid Build Coastguard Worker mova m0, m2 2345*c0909341SAndroid Build Coastguard Worker mova m1, m3 2346*c0909341SAndroid Build Coastguard Worker mova m2, m4 2347*c0909341SAndroid Build Coastguard Worker mova m3, m5 2348*c0909341SAndroid Build Coastguard Worker 2349*c0909341SAndroid Build Coastguard Worker.pass2_main: 2350*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1 ] 2351*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*3 ] 2352*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*13] 2353*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*15] 2354*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m4 2355*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m5 2356*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m6 2357*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m7 2358*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5 ] 2359*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*7 ] 2360*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*9 ] 2361*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*11] 2362*c0909341SAndroid Build Coastguard Worker 2363*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 2364*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2_end 2365*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).end 2366*c0909341SAndroid Build Coastguard Worker 2367*c0909341SAndroid Build Coastguard Worker.end: 2368*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2369*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2370*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2371*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2372*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).end 2373*c0909341SAndroid Build Coastguard Worker 2374*c0909341SAndroid Build Coastguard Worker 2375*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct 2376*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst 2377*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst 2378*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity 2379*c0909341SAndroid Build Coastguard Worker 2380*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2381*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32, 1 2382*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 2383*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 2384*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 2385*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 2386*c0909341SAndroid Build Coastguard Worker 2387*c0909341SAndroid Build Coastguard Worker.pass1_end: 2388*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 2389*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 32, 1 2390*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 2391*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 2392*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 2393*c0909341SAndroid Build Coastguard Worker 2394*c0909341SAndroid Build Coastguard Worker.pass2: 2395*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 2396*c0909341SAndroid Build Coastguard Worker 2397*c0909341SAndroid Build Coastguard Worker.end: 2398*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2399*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 2400*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_1697x16)] 2401*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 6, 7}, 0, 1, 2, 3, 4, 5 2402*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+16*1] 2403*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m5 2404*c0909341SAndroid Build Coastguard Worker IDTX16 6, 5, 7 2405*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize+16*0] 2406*c0909341SAndroid Build Coastguard Worker IDTX16 5, 7, 7 2407*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 2408*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 2409*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [rsp+gprsize+16*2] 2410*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m5 2411*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 2412*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m7 2413*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end3 2414*c0909341SAndroid Build Coastguard Worker 2415*c0909341SAndroid Build Coastguard Worker.end1: 2416*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 2417*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2418*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 2419*c0909341SAndroid Build Coastguard Worker jmp .end 2420*c0909341SAndroid Build Coastguard Worker 2421*c0909341SAndroid Build Coastguard Worker 2422*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2 ; type1, type2 2423*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x8, 8, 16*16 2424*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2425*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 2426*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 2427*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 2428*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 2429*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2430*c0909341SAndroid Build Coastguard Worker mov r2d, 4 2431*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_16x8_8bpc).end)] 2432*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 2433*c0909341SAndroid Build Coastguard Worker.end: 2434*c0909341SAndroid Build Coastguard Worker RET 2435*c0909341SAndroid Build Coastguard Worker%endif 2436*c0909341SAndroid Build Coastguard Worker%endmacro 2437*c0909341SAndroid Build Coastguard Worker 2438*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct 2439*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst 2440*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst 2441*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity 2442*c0909341SAndroid Build Coastguard Worker 2443*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2444*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 32, 1 2445*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 2446*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 2447*c0909341SAndroid Build Coastguard Worker 2448*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32, 1 2449*c0909341SAndroid Build Coastguard Worker call .main 2450*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 2451*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 2452*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 2453*c0909341SAndroid Build Coastguard Worker 2454*c0909341SAndroid Build Coastguard Worker.pass1_end: 2455*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 2456*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2457*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2458*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 2459*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 2460*c0909341SAndroid Build Coastguard Worker 2461*c0909341SAndroid Build Coastguard Worker.pass2: 2462*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 2463*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 2464*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 2465*c0909341SAndroid Build Coastguard Worker 2466*c0909341SAndroid Build Coastguard Worker.end: 2467*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 2468*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2469*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2470*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 2471*c0909341SAndroid Build Coastguard Worker 2472*c0909341SAndroid Build Coastguard Worker 2473*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2474*c0909341SAndroid Build Coastguard Workercglobal_label .main 2475*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m2 2476*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m6 2477*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+32*5], m5 2478*c0909341SAndroid Build Coastguard Worker 2479*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 2480*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 7, 2, 5, 6, 401, 4076 ;t8a, t15a 2481*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 3, 2, 5, 6, 3166, 2598 ;t9a, t14a 2482*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m4 ;t9 2483*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;t8 2484*c0909341SAndroid Build Coastguard Worker psubsw m4, m7, m3 ;t14 2485*c0909341SAndroid Build Coastguard Worker paddsw m7, m3 ;t15 2486*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 2, 3, 5, 6, 1567, 3784 ;t9a, t14a 2487*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*1] 2488*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+32*5] 2489*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m2 2490*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+32*5], m4 2491*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*2] 2492*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m7 2493*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 5, 7, 4, 6, 1931, 3612 ;t10a, t13a 2494*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 7, 4, 6, 3920, 1189 ;t11a, t12a 2495*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m3 ;t10 2496*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;t11 2497*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m5 ;t13 2498*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ;t12 2499*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 4, 7, 5, 6, m3784, 1567 ;t10a, t13a 2500*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+32*5] 2501*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m2 ;t11a 2502*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t8a 2503*c0909341SAndroid Build Coastguard Worker paddsw m2, m7, m3 ;t9 2504*c0909341SAndroid Build Coastguard Worker psubsw m7, m3 ;t10 2505*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*0] 2506*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m0 ;out8 2507*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out7 2508*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+32*5], m0 2509*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*9] 2510*c0909341SAndroid Build Coastguard Worker psubsw m0, m5, m2 ;out9 2511*c0909341SAndroid Build Coastguard Worker paddsw m2, m5 ;out6 2512*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m0 2513*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*9], m2 2514*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*1] 2515*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*2] 2516*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m3 2517*c0909341SAndroid Build Coastguard Worker psubsw m5, m0, m4 ;t13 2518*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;t14 2519*c0909341SAndroid Build Coastguard Worker mova m3, [o(pd_2048)] 2520*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m1 ;t12a 2521*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 ;t15a 2522*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m1 2523*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 7, 1, 2, 3, 2896, 2896 ;t10a, t13a 2524*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 6, 1, 2, 3, 2896, 2896 ;t11, t12 2525*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*8] 2526*c0909341SAndroid Build Coastguard Worker psubsw m2, m3, m5 ;out10 2527*c0909341SAndroid Build Coastguard Worker paddsw m3, m5 ;out5 2528*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*7] 2529*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*8], m3 2530*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m4 ;out11 2531*c0909341SAndroid Build Coastguard Worker paddsw m5, m4 ;out4 2532*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*6] 2533*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*7], m5 2534*c0909341SAndroid Build Coastguard Worker paddsw m5, m4, m6 ;out3 2535*c0909341SAndroid Build Coastguard Worker psubsw m4, m6 ;out12 2536*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*5] 2537*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*6], m5 2538*c0909341SAndroid Build Coastguard Worker psubsw m5, m6, m7 ;out13 2539*c0909341SAndroid Build Coastguard Worker paddsw m6, m7 ;out2 2540*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*4] 2541*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*5], m6 2542*c0909341SAndroid Build Coastguard Worker psubsw m6, m7, m0 ;out14 2543*c0909341SAndroid Build Coastguard Worker paddsw m7, m0 ;out1 2544*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*2] 2545*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*3] 2546*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4], m7 2547*c0909341SAndroid Build Coastguard Worker psubsw m7, m0, m1 ;out15 2548*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;out0 2549*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*3], m0 2550*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*0] 2551*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*1] 2552*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m7 2553*c0909341SAndroid Build Coastguard Worker ret 2554*c0909341SAndroid Build Coastguard Worker 2555*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct 2556*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst 2557*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst 2558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity 2559*c0909341SAndroid Build Coastguard Worker 2560*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2561*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896x8)] 2562*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*0 ] 2563*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*1 ] 2564*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*14] 2565*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*15] 2566*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 2567*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 2568*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m2 2569*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m3 2570*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*6 ] 2571*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*7 ] 2572*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*8 ] 2573*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*9 ] 2574*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m2 2575*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m3 2576*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m0 2577*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m1 2578*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*2 ] 2579*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*3 ] 2580*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*4 ] 2581*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*5 ] 2582*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, [coeffq+16*10] 2583*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, [coeffq+16*11] 2584*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [coeffq+16*12] 2585*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*13] 2586*c0909341SAndroid Build Coastguard Worker 2587*c0909341SAndroid Build Coastguard Worker call .main 2588*c0909341SAndroid Build Coastguard Worker call .main_pass1_end 2589*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 2590*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 2591*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end 2592*c0909341SAndroid Build Coastguard Worker 2593*c0909341SAndroid Build Coastguard Worker.pass1_end: 2594*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 2595*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2596*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2597*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 2598*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end 2599*c0909341SAndroid Build Coastguard Worker 2600*c0909341SAndroid Build Coastguard Worker.pass2: 2601*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 2602*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 2603*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass2_main 2604*c0909341SAndroid Build Coastguard Worker 2605*c0909341SAndroid Build Coastguard Worker.end: 2606*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 2607*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2608*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2609*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass2_main 2610*c0909341SAndroid Build Coastguard Worker 2611*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2612*c0909341SAndroid Build Coastguard Workercglobal_label .main 2613*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m1 2614*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m2 2615*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m6 2616*c0909341SAndroid Build Coastguard Worker 2617*c0909341SAndroid Build Coastguard Worker mova m6, [o(pd_2048)] 2618*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 7, 0, 1, 2, 6, 995, 3973 ;t3, t2 2619*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 4, 1, 2, 6, 3513, 2106 ;t11, t10 2620*c0909341SAndroid Build Coastguard Worker psubsw m1, m0, m4 ;t10a 2621*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;t2a 2622*c0909341SAndroid Build Coastguard Worker psubsw m4, m7, m3 ;t11a 2623*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 ;t3a 2624*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 4, 7, 2, 6, 3406, 2276 ;t11, t10 2625*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*0] ;in3 2626*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*1] ;in4 2627*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0], m1 ;t11 2628*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1], m4 ;t10 2629*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*2] ;in12 2630*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m0 ;t2a 2631*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 7, 0, 4, 6, 1751, 3703 ;t5, t4 2632*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 0, 4, 6, 3857, 1380 ;t13, t12 2633*c0909341SAndroid Build Coastguard Worker psubsw m0, m7, m1 ;t12a 2634*c0909341SAndroid Build Coastguard Worker paddsw m1, m7 ;t4a 2635*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m2 ;t13a 2636*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 ;t5a 2637*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 0, 7, 2, 6, 4017, 799 ;t12, t13 2638*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*8] ;in1 2639*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*9] ;in14 2640*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*8], m4 ;t12 2641*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*9], m0 ;t13 2642*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*4] ;in9 2643*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*5] ;in6 2644*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4], m1 ;t4a 2645*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*5], m5 ;t5a 2646*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 7, 1, 5, 6, 4052, 601 ;t15, t14 2647*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 0, 1, 5, 6, 2440, 3290 ;t7, t6 2648*c0909341SAndroid Build Coastguard Worker psubsw m1, m0, m7 ;t14a 2649*c0909341SAndroid Build Coastguard Worker paddsw m0, m7 ;t6a 2650*c0909341SAndroid Build Coastguard Worker psubsw m5, m4, m2 ;t15a 2651*c0909341SAndroid Build Coastguard Worker paddsw m4, m2 ;t7a 2652*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 1, 7, 2, 6, 2276, 3406 ;t14, t15 2653*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*2] ;t2a 2654*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2], m5 ;t14 2655*c0909341SAndroid Build Coastguard Worker psubsw m7, m2, m0 ;t6 2656*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;t2 2657*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m4 ;t7 2658*c0909341SAndroid Build Coastguard Worker paddsw m3, m4 ;t3 2659*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 7, 4, 5, 6, 3784, 1567 ;t6a, t7a 2660*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*7] ;in0 2661*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+32*5] ;in15 2662*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*7], m3 ;t3 2663*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+32*5], m1 ;t15 2664*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*6] ;in7 2665*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*3] ;in8 2666*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*6], m7 ;t7a 2667*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*3], m0 ;t6a 2668*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 0, 7, 6, 201, 4091 ;t1, t0 2669*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 3, 0, 7, 6, 3035, 2751 ;t9, t8 2670*c0909341SAndroid Build Coastguard Worker psubsw m0, m4, m3 ;t8a 2671*c0909341SAndroid Build Coastguard Worker paddsw m4, m3 ;t0a 2672*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m1 ;t9a 2673*c0909341SAndroid Build Coastguard Worker paddsw m5, m1 ;t1a 2674*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 1, 7, 6, 799, 4017 ;t9, t8 2675*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*4] ;t4a 2676*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*5] ;t5a 2677*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4], m3 ;t8 2678*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*5], m0 ;t9 2679*c0909341SAndroid Build Coastguard Worker psubsw m0, m4, m1 ;t4 2680*c0909341SAndroid Build Coastguard Worker paddsw m4, m1 ;t0 2681*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m7 ;t5 2682*c0909341SAndroid Build Coastguard Worker paddsw m5, m7 ;t1 2683*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 1, 7, 6, 1567, 3784 ;t5a, t4a 2684*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize*2+16*3] ;t6a 2685*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m2 ;t2a 2686*c0909341SAndroid Build Coastguard Worker paddsw m4, m2 ;out0 2687*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*3], m4 ;out0 2688*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*6] ;t7a 2689*c0909341SAndroid Build Coastguard Worker psubsw m2, m3, m7 ;t6 2690*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 ;-out3 2691*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*6], m3 ;-out3 2692*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m4 ;t7 2693*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;out12 2694*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*12], m3 2695*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*7] ;t3 2696*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 7], m2 ;out4 2697*c0909341SAndroid Build Coastguard Worker psubsw m2, m5, m3 ;t3a 2698*c0909341SAndroid Build Coastguard Worker paddsw m5, m3 ;-out15 2699*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*11], m2 2700*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+32*5] ;t15 2701*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*10], m1 ;-out7 2702*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*0] ;t11 2703*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*0 ], m5 ;-out15 2704*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*1] ;t10 2705*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*1 ], m4 ;-out11 2706*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*2] ;t14 2707*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*2 ], m0 ;out12 2708*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m4 ;t14a 2709*c0909341SAndroid Build Coastguard Worker paddsw m3, m4 ;t10a 2710*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m2 ;t15a 2711*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 ;t11a 2712*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 0, 2, 4, 6, 3784, 1567 ;t14, t15 2713*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*4] ;t8 2714*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*5] ;t9 2715*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4], m3 ;t10a 2716*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*5], m1 ;t11a 2717*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*8] ;t12 2718*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*9] ;t13 2719*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*8], m5 ;t14 2720*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*9], m0 ;t15 2721*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;t12a 2722*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;t8a 2723*c0909341SAndroid Build Coastguard Worker psubsw m0, m4, m1 ;t13a 2724*c0909341SAndroid Build Coastguard Worker paddsw m4, m1 ;t9a 2725*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 0, 1, 3, 6, 1567, 3784 ;t13, t12 2726*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*4] ;t10a 2727*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*5] ;t11a 2728*c0909341SAndroid Build Coastguard Worker psubsw m3, m2, m6 ;t10 2729*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 ;-out1 2730*c0909341SAndroid Build Coastguard Worker paddsw m6, m4, m1 ;out14 2731*c0909341SAndroid Build Coastguard Worker psubsw m4, m1 ;t11 2732*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*14], m4 2733*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 4], m2 ;-out1 2734*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*8] ;t14 2735*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*9] ;t15 2736*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 9], m3 ;out6 2737*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m4 ;t14a 2738*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ;out2 2739*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m2 ;t15a 2740*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 ;-out13 2741*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 5], m0 ;out2 2742*c0909341SAndroid Build Coastguard Worker ret 2743*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2744*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 2745*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*14] 2746*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*14], m5 2747*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*15], m6 2748*c0909341SAndroid Build Coastguard Worker mova m5, [o(pw_2896_2896)] 2749*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_2896_m2896)] 2750*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 2751*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 2752*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 2753*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m5, m2 2754*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m6 2755*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m5, m3 2756*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 2757*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m4, m2, m1, m3 2758*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m1, m2, m3 2759*c0909341SAndroid Build Coastguard Worker packssdw m4, m1 ;-out5 2760*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 ;out10 2761*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 8], m4 2762*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16* 9] 2763*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m0 2764*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0 2765*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m5, m1 2766*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m6 2767*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m5, m3 2768*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 2769*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m4, m3 2770*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m4, m1, m3 2771*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 ;out6 2772*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 ;-out9 2773*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 9], m0 2774*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16* 7] 2775*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*12] 2776*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m0, m4 2777*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m4 2778*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m5, m3 2779*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 2780*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m0 2781*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m6 2782*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m4, m3, m5, m0 2783*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m5, m3, m0 2784*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 ;out4 2785*c0909341SAndroid Build Coastguard Worker packssdw m3, m0 ;-out11 2786*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 7], m4 2787*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*10] 2788*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*11] 2789*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4, m5 2790*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 2791*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m0, [o(pw_2896_2896)] 2792*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m6 2793*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m4 2794*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [o(pw_2896_2896)] 2795*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m5, m0, m6, m4 2796*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m6, m5, m4 2797*c0909341SAndroid Build Coastguard Worker packssdw m0, m6 ;out8 2798*c0909341SAndroid Build Coastguard Worker packssdw m5, m4 ;-out7 2799*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*10], m5 2800*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16* 2] ;out12 2801*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*14] ;-out13 2802*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*15] ;out14 2803*c0909341SAndroid Build Coastguard Worker ret 2804*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2805*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 2806*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896x8)] 2807*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16* 9] 2808*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*14] 2809*c0909341SAndroid Build Coastguard Worker paddsw m0, m1, m2 2810*c0909341SAndroid Build Coastguard Worker psubsw m1, m2 2811*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 ;out6 2812*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 ;-out9 2813*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 9], m0 2814*c0909341SAndroid Build Coastguard Worker psubsw m2, m3, m4 2815*c0909341SAndroid Build Coastguard Worker paddsw m3, m4 2816*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7 ;out10 2817*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7 ;-out5 2818*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 8], m3 2819*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16* 7] 2820*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*12] 2821*c0909341SAndroid Build Coastguard Worker paddsw m0, m3, m4 2822*c0909341SAndroid Build Coastguard Worker psubsw m3, m4 2823*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 ;out4 2824*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7 ;-out11 2825*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16* 7], m0 2826*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*10] 2827*c0909341SAndroid Build Coastguard Worker paddsw m4, m0, [rsp+gprsize*2+16*11] 2828*c0909341SAndroid Build Coastguard Worker psubsw m0, [rsp+gprsize*2+16*11] 2829*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7 ;-out7 2830*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 ;out8 2831*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*10], m4 2832*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*2 ] ;out12 2833*c0909341SAndroid Build Coastguard Worker ret 2834*c0909341SAndroid Build Coastguard Worker 2835*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct 2836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst 2837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst 2838*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity 2839*c0909341SAndroid Build Coastguard Worker 2840*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2841*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2896x8)] 2842*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*0 ] 2843*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*1 ] 2844*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*14] 2845*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*15] 2846*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 2847*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 2848*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m2 2849*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m3 2850*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*6 ] 2851*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*7 ] 2852*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*8 ] 2853*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*9 ] 2854*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m2 2855*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m3 2856*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m0 2857*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m1 2858*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [coeffq+16*2 ] 2859*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [coeffq+16*3 ] 2860*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [coeffq+16*4 ] 2861*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [coeffq+16*5 ] 2862*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, [coeffq+16*10] 2863*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, [coeffq+16*11] 2864*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [coeffq+16*12] 2865*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [coeffq+16*13] 2866*c0909341SAndroid Build Coastguard Worker 2867*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 2868*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 2869*c0909341SAndroid Build Coastguard Worker 2870*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 2871*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*0, 32 2872*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 2873*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2874*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 2875*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 2876*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end 2877*c0909341SAndroid Build Coastguard Worker 2878*c0909341SAndroid Build Coastguard Worker.pass1_end: 2879*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 2880*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 32 2881*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 2882*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 2883*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end 2884*c0909341SAndroid Build Coastguard Worker 2885*c0909341SAndroid Build Coastguard Worker.pass2: 2886*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 2887*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 2888*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass2_main 2889*c0909341SAndroid Build Coastguard Worker 2890*c0909341SAndroid Build Coastguard Worker.end: 2891*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 2892*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2893*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2894*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass2_main 2895*c0909341SAndroid Build Coastguard Worker 2896*c0909341SAndroid Build Coastguard Worker 2897*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct 2898*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst 2899*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst 2900*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity 2901*c0909341SAndroid Build Coastguard Worker 2902*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2903*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 2904*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq-16*7] 2905*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq-16*5] 2906*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq-16*3] 2907*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq-16*1] 2908*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 2909*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 2910*c0909341SAndroid Build Coastguard Worker 2911*c0909341SAndroid Build Coastguard Worker.pass1: 2912*c0909341SAndroid Build Coastguard Worker mova m0, [o(pw_2896x8)] 2913*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_1697x16)] 2914*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_16384)] 2915*c0909341SAndroid Build Coastguard Worker sub coeffq, 8*16 2916*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m0}, m4, m5, m6, m7 2917*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2, m4 2918*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3 2919*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ; 1 2920*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m2, m5 2921*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m3 2922*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ; 3 2923*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m2, m6 2924*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m3 2925*c0909341SAndroid Build Coastguard Worker paddsw m5, m6 ; 5 2926*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m2, m7 2927*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m3 2928*c0909341SAndroid Build Coastguard Worker paddsw m7, m6 ; 7 2929*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m0, [coeffq+16*6] 2930*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m4 2931*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m2, m6 2932*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m3 2933*c0909341SAndroid Build Coastguard Worker paddsw m6, m4 ; 6 2934*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m0, [coeffq+16*4] 2935*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 2936*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m2, m4 2937*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m3 2938*c0909341SAndroid Build Coastguard Worker paddsw m4, m6 ; 4 2939*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m0, [coeffq+16*2] 2940*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [coeffq+16*0] 2941*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 2942*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3 2943*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 ; 2 2944*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m0, [o(pw_1697x16)] 2945*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m3 2946*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+16*0] 2947*c0909341SAndroid Build Coastguard Worker paddsw m0, m6 2948*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 2949*c0909341SAndroid Build Coastguard Worker 2950*c0909341SAndroid Build Coastguard Worker.pass1_end: 2951*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1], m4 2952*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*3], m5 2953*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5], m6 2954*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*7], m7 2955*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq-16*7] 2956*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq-16*5] 2957*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq-16*3] 2958*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq-16*1] 2959*c0909341SAndroid Build Coastguard Worker mova [coeffq-16*7], m0 2960*c0909341SAndroid Build Coastguard Worker mova [coeffq-16*5], m1 2961*c0909341SAndroid Build Coastguard Worker mova [coeffq-16*3], m2 2962*c0909341SAndroid Build Coastguard Worker mova [coeffq-16*1], m3 2963*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 2964*c0909341SAndroid Build Coastguard Worker jmp .pass1 2965*c0909341SAndroid Build Coastguard Worker 2966*c0909341SAndroid Build Coastguard Worker.pass2: 2967*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 2968*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 2969*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x8_internal_8bpc).end 2970*c0909341SAndroid Build Coastguard Worker 2971*c0909341SAndroid Build Coastguard Worker.end: 2972*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 2973*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 2974*c0909341SAndroid Build Coastguard Worker mov dstq, r3 2975*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x8_internal_8bpc).end 2976*c0909341SAndroid Build Coastguard Worker 2977*c0909341SAndroid Build Coastguard Worker 2978*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2 ; type1, type2 2979*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x16, 8, 16*16 2980*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2981*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 2982*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 2983*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 2984*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 2985*c0909341SAndroid Build Coastguard Worker mov r2d, 8 2986*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_16x16_8bpc).end)] 2987*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 2988*c0909341SAndroid Build Coastguard Worker.end: 2989*c0909341SAndroid Build Coastguard Worker RET 2990*c0909341SAndroid Build Coastguard Worker%endif 2991*c0909341SAndroid Build Coastguard Worker%endmacro 2992*c0909341SAndroid Build Coastguard Worker 2993*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct 2994*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst 2995*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst 2996*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity 2997*c0909341SAndroid Build Coastguard Worker 2998*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 2999*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 64 3000*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3001*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 3002*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*3, 64 3003*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3004*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 3005*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 3006*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3007*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3008*c0909341SAndroid Build Coastguard Worker 3009*c0909341SAndroid Build Coastguard Worker.pass1_end: 3010*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*17, 32 3011*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3012*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3013*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 3014*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3015*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3016*c0909341SAndroid Build Coastguard Worker 3017*c0909341SAndroid Build Coastguard Worker.pass1_end1: 3018*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 3019*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 64 3020*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3021*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 3022*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*2, 64 3023*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3024*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 3025*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3026*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3027*c0909341SAndroid Build Coastguard Worker 3028*c0909341SAndroid Build Coastguard Worker.pass1_end2: 3029*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*16, 32 3030*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3031*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3032*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 3033*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3034*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3035*c0909341SAndroid Build Coastguard Worker 3036*c0909341SAndroid Build Coastguard Worker.pass2: 3037*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3038*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass2_pre 3039*c0909341SAndroid Build Coastguard Worker 3040*c0909341SAndroid Build Coastguard Worker.end: 3041*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3042*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3043*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 3044*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3045*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 3046*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 3047*c0909341SAndroid Build Coastguard Worker 3048*c0909341SAndroid Build Coastguard Worker.end1: 3049*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3050*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 3051*c0909341SAndroid Build Coastguard Worker 3052*c0909341SAndroid Build Coastguard Worker add coeffq, 32*8 3053*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3054*c0909341SAndroid Build Coastguard Worker 3055*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] 3056*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*4 ] 3057*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*8 ] 3058*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*12] 3059*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*1 ] 3060*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*5 ] 3061*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*9 ] 3062*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*13] 3063*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end)] 3064*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass2_main 3065*c0909341SAndroid Build Coastguard Worker 3066*c0909341SAndroid Build Coastguard Worker 3067*c0909341SAndroid Build Coastguard Worker%macro ITX_16X16_ADST_LOAD_ODD_COEFS 0 3068*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*1 ] 3069*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*3 ] 3070*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*29] 3071*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*31] 3072*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 3073*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 3074*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m2 3075*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m3 3076*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*13] 3077*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*15] 3078*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*17] 3079*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*19] 3080*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m2 3081*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m3 3082*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m0 3083*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m1 3084*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*5 ] 3085*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*7 ] 3086*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] 3087*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*11] 3088*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*21] 3089*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*23] 3090*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*25] 3091*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*27] 3092*c0909341SAndroid Build Coastguard Worker%endmacro 3093*c0909341SAndroid Build Coastguard Worker 3094*c0909341SAndroid Build Coastguard Worker%macro ITX_16X16_ADST_LOAD_EVEN_COEFS 0 3095*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] 3096*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*2 ] 3097*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*28] 3098*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*30] 3099*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m0 3100*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m1 3101*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9], m2 3102*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*5], m3 3103*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*12] 3104*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*14] 3105*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*16] 3106*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*18] 3107*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*3], m2 3108*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*4], m3 3109*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m0 3110*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m1 3111*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] 3112*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6 ] 3113*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*8 ] 3114*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*10] 3115*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*20] 3116*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*22] 3117*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*24] 3118*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*26] 3119*c0909341SAndroid Build Coastguard Worker%endmacro 3120*c0909341SAndroid Build Coastguard Worker 3121*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct 3122*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst 3123*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst 3124*c0909341SAndroid Build Coastguard Worker 3125*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 3126*c0909341SAndroid Build Coastguard Worker ITX_16X16_ADST_LOAD_ODD_COEFS 3127*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 3128*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 3129*c0909341SAndroid Build Coastguard Worker 3130*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 3131*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 3132*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3133*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end1 3134*c0909341SAndroid Build Coastguard Worker 3135*c0909341SAndroid Build Coastguard Worker.pass1_end: 3136*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*17, 32 3137*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3138*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3139*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 3140*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3141*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end1 3142*c0909341SAndroid Build Coastguard Worker 3143*c0909341SAndroid Build Coastguard Worker.pass1_end1: 3144*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 3145*c0909341SAndroid Build Coastguard Worker ITX_16X16_ADST_LOAD_EVEN_COEFS 3146*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 3147*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 3148*c0909341SAndroid Build Coastguard Worker 3149*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 3150*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3151*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end1 3152*c0909341SAndroid Build Coastguard Worker 3153*c0909341SAndroid Build Coastguard Worker.pass1_end2: 3154*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*16, 32 3155*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3156*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3157*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 3158*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3159*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).pass1_end1 3160*c0909341SAndroid Build Coastguard Worker 3161*c0909341SAndroid Build Coastguard Worker.pass2: 3162*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3163*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_8bpc).pass2_pre 3164*c0909341SAndroid Build Coastguard Worker 3165*c0909341SAndroid Build Coastguard Worker.end: 3166*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3167*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3168*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 3169*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3170*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 3171*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end 3172*c0909341SAndroid Build Coastguard Worker 3173*c0909341SAndroid Build Coastguard Worker.end1: 3174*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3175*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 3176*c0909341SAndroid Build Coastguard Worker 3177*c0909341SAndroid Build Coastguard Worker add coeffq, 32*8 3178*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3179*c0909341SAndroid Build Coastguard Worker 3180*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*0 ] 3181*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*2 ] 3182*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] 3183*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6 ] 3184*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*8 ] 3185*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*10] 3186*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*12] 3187*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*14] 3188*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m4 3189*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m5 3190*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m6 3191*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m7 3192*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(iadst_8x16_internal_8bpc).end)] 3193*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_8bpc).pass2_main 3194*c0909341SAndroid Build Coastguard Worker 3195*c0909341SAndroid Build Coastguard Worker 3196*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct 3197*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst 3198*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst 3199*c0909341SAndroid Build Coastguard Worker 3200*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 3201*c0909341SAndroid Build Coastguard Worker ITX_16X16_ADST_LOAD_ODD_COEFS 3202*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 3203*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 3204*c0909341SAndroid Build Coastguard Worker 3205*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 3206*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 3207*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m8192)] 3208*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end1 3209*c0909341SAndroid Build Coastguard Worker 3210*c0909341SAndroid Build Coastguard Worker.pass1_end: 3211*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*1, 32 3212*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3213*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3214*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 3215*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m8192)] 3216*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end1 3217*c0909341SAndroid Build Coastguard Worker 3218*c0909341SAndroid Build Coastguard Worker.pass1_end1: 3219*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*17, 32 3220*c0909341SAndroid Build Coastguard Worker ITX_16X16_ADST_LOAD_EVEN_COEFS 3221*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 3222*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 3223*c0909341SAndroid Build Coastguard Worker 3224*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 3225*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*0, 32 3226*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3227*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3228*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 3229*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m8192)] 3230*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end1 3231*c0909341SAndroid Build Coastguard Worker 3232*c0909341SAndroid Build Coastguard Worker.pass1_end2: 3233*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*16, 32 3234*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16* 0, 32 3235*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3236*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 3237*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_m8192)] 3238*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).pass1_end1 3239*c0909341SAndroid Build Coastguard Worker 3240*c0909341SAndroid Build Coastguard Worker.pass2: 3241*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3242*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 3243*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x16_internal_8bpc).pass2_pre 3244*c0909341SAndroid Build Coastguard Worker 3245*c0909341SAndroid Build Coastguard Worker.end: 3246*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3247*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3248*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 3249*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3250*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).end 3251*c0909341SAndroid Build Coastguard Worker 3252*c0909341SAndroid Build Coastguard Worker.end1: 3253*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3254*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 3255*c0909341SAndroid Build Coastguard Worker 3256*c0909341SAndroid Build Coastguard Worker add coeffq, 32*8 3257*c0909341SAndroid Build Coastguard Worker 3258*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*0 ] 3259*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*2 ] 3260*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] 3261*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*6 ] 3262*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*8 ] 3263*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*10] 3264*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*12] 3265*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*14] 3266*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7], m4 3267*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8], m5 3268*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5], m6 3269*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6], m7 3270*c0909341SAndroid Build Coastguard Worker 3271*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end2)] 3272*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3273*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x16_internal_8bpc).pass2_main 3274*c0909341SAndroid Build Coastguard Worker 3275*c0909341SAndroid Build Coastguard Worker.end2: 3276*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 3277*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3278*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 3279*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3280*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_8bpc).end 3281*c0909341SAndroid Build Coastguard Worker 3282*c0909341SAndroid Build Coastguard Worker 3283*c0909341SAndroid Build Coastguard Worker%macro IDTX16B 3 ; src/dst, tmp, pw_1697x16 3284*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3, m%1 3285*c0909341SAndroid Build Coastguard Worker psraw m%2, 1 3286*c0909341SAndroid Build Coastguard Worker pavgw m%1, m%2 3287*c0909341SAndroid Build Coastguard Worker%endmacro 3288*c0909341SAndroid Build Coastguard Worker 3289*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct 3290*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity 3291*c0909341SAndroid Build Coastguard Worker 3292*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 3293*c0909341SAndroid Build Coastguard Worker add coeffq, 16*17 3294*c0909341SAndroid Build Coastguard Worker mov r3, tx2q 3295*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 3296*c0909341SAndroid Build Coastguard Worker 3297*c0909341SAndroid Build Coastguard Worker.pass1: 3298*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_1697x16)] 3299*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+32*6] 3300*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+32*0] 3301*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+32*1] 3302*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+32*2] 3303*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+32*3] 3304*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+32*4] 3305*c0909341SAndroid Build Coastguard Worker REPX {IDTX16B x, 5, 6}, 7, 0, 1, 2, 3, 4 3306*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+32*5] 3307*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m7 3308*c0909341SAndroid Build Coastguard Worker IDTX16B 5, 7, 6 3309*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+32*7] 3310*c0909341SAndroid Build Coastguard Worker IDTX16B 7, 6, 6 3311*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end3 3312*c0909341SAndroid Build Coastguard Worker 3313*c0909341SAndroid Build Coastguard Worker.pass1_end: 3314*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq, 32 3315*c0909341SAndroid Build Coastguard Worker sub coeffq, 16 3316*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 3317*c0909341SAndroid Build Coastguard Worker jmp .pass1 3318*c0909341SAndroid Build Coastguard Worker 3319*c0909341SAndroid Build Coastguard Worker.pass1_end1: 3320*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq, 32 3321*c0909341SAndroid Build Coastguard Worker sub coeffq, 15*16 3322*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 3323*c0909341SAndroid Build Coastguard Worker jmp .pass1 3324*c0909341SAndroid Build Coastguard Worker 3325*c0909341SAndroid Build Coastguard Worker.pass1_end2: 3326*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq, 32 3327*c0909341SAndroid Build Coastguard Worker sub coeffq, 16 3328*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 3329*c0909341SAndroid Build Coastguard Worker jmp .pass1 3330*c0909341SAndroid Build Coastguard Worker 3331*c0909341SAndroid Build Coastguard Worker.pass2: 3332*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 3333*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 3334*c0909341SAndroid Build Coastguard Worker 3335*c0909341SAndroid Build Coastguard Worker.end: 3336*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 3337*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m4 3338*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_1697x16)] 3339*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 4, 7}, 5, 6, 0, 1, 2, 3 3340*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_2048)] 3341*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m4 3342*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m4 3343*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*2], m5 3344*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize+16*1] 3345*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*1], m6 3346*c0909341SAndroid Build Coastguard Worker IDTX16 5, 6, 7 3347*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+16*0] 3348*c0909341SAndroid Build Coastguard Worker IDTX16 6, 7, 7 3349*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m0, m1, m2, m3, m6 3350*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m5 3351*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m6 3352*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end3 3353*c0909341SAndroid Build Coastguard Worker 3354*c0909341SAndroid Build Coastguard Worker.end1: 3355*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 3356*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end2)] 3357*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3358*c0909341SAndroid Build Coastguard Worker jmp .end 3359*c0909341SAndroid Build Coastguard Worker 3360*c0909341SAndroid Build Coastguard Worker.end2: 3361*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3362*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 3363*c0909341SAndroid Build Coastguard Worker 3364*c0909341SAndroid Build Coastguard Worker add coeffq, 32*8 3365*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 32 3366*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end3)] 3367*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3368*c0909341SAndroid Build Coastguard Worker jmp .end 3369*c0909341SAndroid Build Coastguard Worker 3370*c0909341SAndroid Build Coastguard Worker.end3: 3371*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 3372*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x16_internal_8bpc).end1)] 3373*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3374*c0909341SAndroid Build Coastguard Worker jmp .end 3375*c0909341SAndroid Build Coastguard Worker 3376*c0909341SAndroid Build Coastguard Worker 3377*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2 3378*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3379*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 3380*c0909341SAndroid Build Coastguard Worker%endif 3381*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3382*c0909341SAndroid Build Coastguard Worker jz .dconly 3383*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc) 3384*c0909341SAndroid Build Coastguard Worker RET 3385*c0909341SAndroid Build Coastguard Worker 3386*c0909341SAndroid Build Coastguard Worker.dconly: 3387*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 3388*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 3389*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 3390*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 3391*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 3392*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 ;pw_2048 3393*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 3394*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 3395*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 3396*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 3397*c0909341SAndroid Build Coastguard Worker mov r3d, 8 3398*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3399*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_8bpc).loop 3400*c0909341SAndroid Build Coastguard Worker 3401*c0909341SAndroid Build Coastguard Worker.end: 3402*c0909341SAndroid Build Coastguard Worker RET 3403*c0909341SAndroid Build Coastguard Worker 3404*c0909341SAndroid Build Coastguard Worker 3405*c0909341SAndroid Build Coastguard Worker 3406*c0909341SAndroid Build Coastguard Workercglobal idct_8x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 3407*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3408*c0909341SAndroid Build Coastguard Worker jle .fast 3409*c0909341SAndroid Build Coastguard Worker 3410*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*3, 64 3411*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3412*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3413*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1)] 3414*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3415*c0909341SAndroid Build Coastguard Worker 3416*c0909341SAndroid Build Coastguard Worker.pass1: 3417*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*9 ], m0 ;in24 3418*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*10], m4 ;in28 3419*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*17], m2 ;in26 3420*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*18], m6 ;in30 3421*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m1 ;in25 3422*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m3 ;in27 3423*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m5 ;in29 3424*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 3425*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*2, 64 3426*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3427*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3428*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_1)] 3429*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3430*c0909341SAndroid Build Coastguard Worker 3431*c0909341SAndroid Build Coastguard Worker.pass1_1: 3432*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*7 ], m0 ;in16 3433*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*8 ], m4 ;in20 3434*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*15], m2 ;in18 3435*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*16], m6 ;in22 3436*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m1 ;in17 3437*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m3 ;in19 3438*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m5 ;in21 3439*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m7 ;in23 3440*c0909341SAndroid Build Coastguard Worker 3441*c0909341SAndroid Build Coastguard Worker.fast: 3442*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 64 3443*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3444*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3445*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 3446*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3447*c0909341SAndroid Build Coastguard Worker 3448*c0909341SAndroid Build Coastguard Worker.pass1_end: 3449*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*5 ], m0 ;in8 3450*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*6 ], m4 ;in12 3451*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*13], m2 ;in10 3452*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*14], m6 ;in14 3453*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m1 ;in9 3454*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m3 ;in11 3455*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m5 ;in13 3456*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 3457*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 64 3458*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3459*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3460*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 3461*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3462*c0909341SAndroid Build Coastguard Worker 3463*c0909341SAndroid Build Coastguard Worker.pass1_end1: 3464*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*11], m2 ;in2 3465*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*12], m6 ;in6 3466*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m1 ;in1 3467*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m3 ;in3 3468*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m5 ;in5 3469*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m7 ;in7 3470*c0909341SAndroid Build Coastguard Worker mova m1, m4 ;in4 3471*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+16*5 ] ;in8 3472*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+16*6 ] ;in12 3473*c0909341SAndroid Build Coastguard Worker 3474*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3475*c0909341SAndroid Build Coastguard Worker jg .full 3476*c0909341SAndroid Build Coastguard Worker 3477*c0909341SAndroid Build Coastguard Worker pxor m4, m4 3478*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 3479*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3480*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3 , 16 3481*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+16*11] 3482*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+16*12] 3483*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+16*13] 3484*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+16*14] 3485*c0909341SAndroid Build Coastguard Worker pxor m4, m4 3486*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 3487*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3488*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 3489*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 3490*c0909341SAndroid Build Coastguard Worker 3491*c0909341SAndroid Build Coastguard Worker call .main_fast 3492*c0909341SAndroid Build Coastguard Worker jmp .pass2 3493*c0909341SAndroid Build Coastguard Worker 3494*c0909341SAndroid Build Coastguard Worker.full: 3495*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize+16*7 ] ;in16 3496*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize+16*8 ] ;in20 3497*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+16*9 ] ;in24 3498*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*10] ;in28 3499*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3500*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3 , 16 3501*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 3502*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3503*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 3504*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 3505*c0909341SAndroid Build Coastguard Worker call .main 3506*c0909341SAndroid Build Coastguard Worker 3507*c0909341SAndroid Build Coastguard Worker.pass2: 3508*c0909341SAndroid Build Coastguard Worker lea r3, [o(.end6)] 3509*c0909341SAndroid Build Coastguard Worker 3510*c0909341SAndroid Build Coastguard Worker.end: 3511*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3512*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end2)] 3513*c0909341SAndroid Build Coastguard Worker 3514*c0909341SAndroid Build Coastguard Worker.end1: 3515*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3516*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, \ 3517*c0909341SAndroid Build Coastguard Worker 8, 9, 10, 11, 12, 13, 14, 15, \ 3518*c0909341SAndroid Build Coastguard Worker 16, 17, 18, 19, 20, 21, 22, 23, \ 3519*c0909341SAndroid Build Coastguard Worker 24, 25, 26, 27, 28, 29, 30, 31 3520*c0909341SAndroid Build Coastguard Worker 3521*c0909341SAndroid Build Coastguard Worker jmp tx2q 3522*c0909341SAndroid Build Coastguard Worker 3523*c0909341SAndroid Build Coastguard Worker.end2: 3524*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end3)] 3525*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 3526*c0909341SAndroid Build Coastguard Worker 3527*c0909341SAndroid Build Coastguard Worker.end3: 3528*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 3529*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3530*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3531*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end4)] 3532*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 3533*c0909341SAndroid Build Coastguard Worker 3534*c0909341SAndroid Build Coastguard Worker.end4: 3535*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 3536*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3537*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3538*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end5)] 3539*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 3540*c0909341SAndroid Build Coastguard Worker 3541*c0909341SAndroid Build Coastguard Worker.end5: 3542*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 3543*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3544*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3545*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 3546*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 3547*c0909341SAndroid Build Coastguard Worker 3548*c0909341SAndroid Build Coastguard Worker.end6: 3549*c0909341SAndroid Build Coastguard Worker ret 3550*c0909341SAndroid Build Coastguard Worker 3551*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3552*c0909341SAndroid Build Coastguard Workercglobal_label .main_veryfast 3553*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*19] ;in1 3554*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4091x8)] ;t30,t31 3555*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_201x8)] ;t16,t17 3556*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 3557*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*19], m0 ;t16 3558*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*34], m3 ;t31 3559*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 799, 4017 ;t17a, t30a 3560*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m3 ;t17a 3561*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m0 ;t30a 3562*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*22] ;in7 3563*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3857x8)] ;t28,t29 3564*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1380x8)] ;t18,t19 3565*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*22], m1 ;t19 3566*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*31], m2 ;t28 3567*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 0, 3, 7, m4017, 799 ;t18a, t29a 3568*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m2 ;t18a 3569*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m1 ;t29a 3570*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*23] ;in5 3571*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3973x8)] ;t26, t27 3572*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_995x8)] ;t20, t21 3573*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*23], m0 ;t20 3574*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m3 ;t27 3575*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 3406, 2276 ;t21a, t26a 3576*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m3 ;t21a 3577*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m0 ;t26a 3578*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*26] ;in3 3579*c0909341SAndroid Build Coastguard Worker pxor m0, m0 3580*c0909341SAndroid Build Coastguard Worker mova m3, m0 3581*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2, [o(pw_4052x8)] 3582*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, [o(pw_m601x8)] 3583*c0909341SAndroid Build Coastguard Worker jmp .main2 3584*c0909341SAndroid Build Coastguard Worker 3585*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3586*c0909341SAndroid Build Coastguard Workercglobal_label .main_fast ;bottom half is zero 3587*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*19] ;in1 3588*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*20] ;in15 3589*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4091x8)] ;t31a 3590*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_201x8)] ;t16a 3591*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3035x8)] ;t30a 3592*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m2751x8)] ;t17a 3593*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 3594*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t17 3595*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t16 3596*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t30 3597*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t31 3598*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 799, 4017 ;t17a, t30a 3599*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*19], m0 ;t16 3600*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m5 ;t17a 3601*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m4 ;t30a 3602*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*34], m3 ;t31 3603*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*21] ;in9 3604*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*22] ;in7 3605*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3703x8)] 3606*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_1751x8)] 3607*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3857x8)] 3608*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1380x8)] 3609*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m0 ;t18 3610*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t19 3611*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;t29 3612*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t28 3613*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m4017, 799 ;t18a, t29a 3614*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m5 ;t18a 3615*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*22], m0 ;t19 3616*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*31], m3 ;t28 3617*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m4 ;t29a 3618*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*23] ;in5 3619*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*24] ;in11 3620*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3973x8)] 3621*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_995x8)] 3622*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3513x8)] 3623*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m2106x8)] 3624*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t21 3625*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t20 3626*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t26 3627*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t27 3628*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 3406, 2276 ;t21a, t26a 3629*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*23], m0 ;t20 3630*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m5 ;t21a 3631*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m4 ;t26a 3632*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m3 ;t27 3633*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*25] ;in13 3634*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*26] ;in3 3635*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3290x8)] 3636*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2440x8)] 3637*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2, [o(pw_4052x8)] 3638*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, [o(pw_m601x8)] 3639*c0909341SAndroid Build Coastguard Worker jmp .main2 3640*c0909341SAndroid Build Coastguard Worker 3641*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3642*c0909341SAndroid Build Coastguard Workercglobal_label .main 3643*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 3644*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*19] ;in1 3645*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*20] ;in15 3646*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*33] ;in17 3647*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*34] ;in31 3648*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 4, 5, 7, 201, 4091 ;t16a, t31a 3649*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 4, 5, 7, 3035, 2751 ;t17a, t30a 3650*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t17 3651*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t16 3652*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m1 ;t30 3653*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t31 3654*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 799, 4017 ;t17a, t30a 3655*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*19], m0 ;t16 3656*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m5 ;t17a 3657*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m4 ;t30a 3658*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*34], m3 ;t31 3659*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*21] ;in9 3660*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*22] ;in7 3661*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*31] ;in25 3662*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*32] ;in23 3663*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 4, 5, 7, 1751, 3703 ;t18a, t29a 3664*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 4, 5, 7, 3857, 1380 ;t19a, t28a 3665*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m0 ;t18 3666*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t19 3667*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t29 3668*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t28 3669*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m4017, 799 ;t18a, t29a 3670*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m5 ;t18a 3671*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*22], m0 ;t19 3672*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*31], m3 ;t28 3673*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m4 ;t29a 3674*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*23] ;in5 3675*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*24] ;in11 3676*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*29] ;in21 3677*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*30] ;in27 3678*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 4, 5, 7, 995, 3973 ;t20a, t27a 3679*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 4, 5, 7, 3513, 2106 ;t21a, t26a 3680*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t21 3681*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t20 3682*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m1 ;t26 3683*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t27 3684*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 3406, 2276 ;t21a, t26a 3685*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*23], m0 ;t20 3686*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m5 ;t21a 3687*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m4 ;t26a 3688*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m3 ;t27 3689*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*25] ;in13 3690*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*26] ;in3 3691*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*27] ;in29 3692*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*28] ;in19 3693*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 3, 4, 5, 7, 2440, 3290 ;t22a, t25a 3694*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 4, 5, 7, 4052, 601 ;t23a, t24a 3695*c0909341SAndroid Build Coastguard Worker 3696*c0909341SAndroid Build Coastguard Worker.main2: 3697*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m0 ;t22 3698*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t23 3699*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t25 3700*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t24 3701*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m2276, 3406 ;t22a, t25a 3702*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*24] ;t21a 3703*c0909341SAndroid Build Coastguard Worker psubsw m1, m5, m2 ;t21 3704*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 ;t22 3705*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*25], m5 ;t22 3706*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*29] ;t26a 3707*c0909341SAndroid Build Coastguard Worker psubsw m5, m4, m2 ;t26 3708*c0909341SAndroid Build Coastguard Worker paddsw m4, m2 ;t25 3709*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*28], m4 ;t25 3710*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 1, 2, 4, 7, m3784, 1567 ;t21a, t26a 3711*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m5 ;t21a 3712*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m1 ;t26a 3713*c0909341SAndroid Build Coastguard Worker 3714*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*23] ;t20 3715*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*30] ;t27 3716*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m1 ;t20a 3717*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t23a 3718*c0909341SAndroid Build Coastguard Worker psubsw m6, m3, m5 ;t27a 3719*c0909341SAndroid Build Coastguard Worker paddsw m3, m5 ;t24a 3720*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 2, 1, 5, 7, m3784, 1567 ;t20, t27 3721*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*26], m0 ;t23a 3722*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*27], m3 ;t24a 3723*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m2 ;t27 3724*c0909341SAndroid Build Coastguard Worker 3725*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*20] ;t17a 3726*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*21] ;t18a 3727*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*32] ;t29a 3728*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*33] ;t30a 3729*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t18 3730*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t17 3731*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t29 3732*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t30 3733*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 1567, 3784 ;t18a, t29a 3734*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m0 ;t17 3735*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m5 ;t18a 3736*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m4 ;t29a 3737*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m3 ;t30 3738*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*19] ;t16 3739*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*22] ;t19 3740*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*31] ;t28 3741*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*34] ;t31 3742*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t19a 3743*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t16a 3744*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t28a 3745*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t31a 3746*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 1567, 3784 ;t19, t28 3747*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*15] ;tmp12 3748*c0909341SAndroid Build Coastguard Worker psubsw m1, m5, m6 ;t20a 3749*c0909341SAndroid Build Coastguard Worker paddsw m5, m6 ;t19a 3750*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m5 ;out19 3751*c0909341SAndroid Build Coastguard Worker paddsw m2, m5 ;out12 3752*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*30] ;t27 3753*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*22], m6 ;out19 3754*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*15], m2 ;out12 3755*c0909341SAndroid Build Coastguard Worker psubsw m6, m4, m5 ;t27a 3756*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ;t28a 3757*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 1, 2, 5, 7, 2896, 2896 ;t20, t27 3758*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*6 ] ;tmp3 3759*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m4 ;out28 3760*c0909341SAndroid Build Coastguard Worker paddsw m2, m4 ;out3 3761*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*14] ;tmp11 3762*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*31], m5 ;out28 3763*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*6 ], m2 ;out3 3764*c0909341SAndroid Build Coastguard Worker psubsw m5, m4, m6 ;out20 3765*c0909341SAndroid Build Coastguard Worker paddsw m4, m6 ;out11 3766*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*7 ] ;tmp4 3767*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*23], m5 ;out20 3768*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*14], m4 ;out11 3769*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m1 ;out27 3770*c0909341SAndroid Build Coastguard Worker paddsw m2, m1 ;out4 3771*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*26] ;t23a 3772*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*27] ;t24a 3773*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m5 ;out27 3774*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*7 ], m2 ;out4 3775*c0909341SAndroid Build Coastguard Worker psubsw m5, m0, m1 ;t23 3776*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t16 3777*c0909341SAndroid Build Coastguard Worker psubsw m2, m3, m4 ;t24 3778*c0909341SAndroid Build Coastguard Worker paddsw m3, m4 ;t31 3779*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 5, 4, 6, 7, 2896, 2896 ;t23a, t24a 3780*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*18] ;tmp15 3781*c0909341SAndroid Build Coastguard Worker psubsw m4, m6, m0 ;out16 3782*c0909341SAndroid Build Coastguard Worker paddsw m6, m0 ;out15 3783*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*3 ] ;tmp0 3784*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*11] ;tmp8 3785*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*18], m6 ;out15 3786*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*19], m4 ;out16 3787*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m3 ;out31 3788*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ;out0 3789*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m2 ;out23 3790*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 ;out8 3791*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*10] ;tmp7 3792*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*34], m6 ;out31 3793*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*11], m1 ;out8 3794*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*26], m4 ;out23 3795*c0909341SAndroid Build Coastguard Worker paddsw m6, m3, m5 ;out7 3796*c0909341SAndroid Build Coastguard Worker psubsw m3, m5 ;out24 3797*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*20] ;t17 3798*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*25] ;t22 3799*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*17] ;tmp14 3800*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*27], m3 ;out24 3801*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m5 ;t22a 3802*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ;t17a 3803*c0909341SAndroid Build Coastguard Worker psubsw m3, m2, m1 ;out17 3804*c0909341SAndroid Build Coastguard Worker paddsw m2, m1 ;out14 3805*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*28] ;t25 3806*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*33] ;t30 3807*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*17], m2 ;out14 3808*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m3 ;out17 3809*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m5 ;t25a 3810*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ;t30a 3811*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 4, 3, 5, 7, 2896, 2896 ;t22, t25 3812*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*4 ] ;tmp1 3813*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m1 ;out30 3814*c0909341SAndroid Build Coastguard Worker paddsw m5, m1 ;out1 3815*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*12] ;tmp9 3816*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m3 ;out30 3817*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4 ], m5 ;out1 3818*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m2 ;out22 3819*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 ;out9 3820*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*9 ] ;tmp6 3821*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*25], m3 ;out22 3822*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*12], m1 ;out9 3823*c0909341SAndroid Build Coastguard Worker psubsw m3, m5, m4 ;out25 3824*c0909341SAndroid Build Coastguard Worker paddsw m5, m4 ;out6 3825*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*21] ;t18a 3826*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*24] ;t21a 3827*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*16] ;tmp13 3828*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*28], m3 ;out25 3829*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*9 ], m5 ;out6 3830*c0909341SAndroid Build Coastguard Worker paddsw m3, m4, m1 ;t18 3831*c0909341SAndroid Build Coastguard Worker psubsw m4, m1 ;t21 3832*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;out18 3833*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;out13 3834*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*29] ;t26a 3835*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*32] ;t29a 3836*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m5 ;out18 3837*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*16], m2 ;out13 3838*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m1 ;t26 3839*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t29 3840*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 2896, 2896 ;t21a, t26a 3841*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*5 ] ;tmp2 3842*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m3 ;out29 3843*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;out2 3844*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*13] ;tmp10 3845*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m1 ;out29 3846*c0909341SAndroid Build Coastguard Worker psubsw m7, m3, m5 ;out21 3847*c0909341SAndroid Build Coastguard Worker paddsw m3, m5 ;out10 3848*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*8 ] ;tmp5 3849*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m7 ;out21 3850*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*13], m3 ;out10 3851*c0909341SAndroid Build Coastguard Worker psubsw m1, m5, m4 ;out26 3852*c0909341SAndroid Build Coastguard Worker paddsw m5, m4 ;out5 3853*c0909341SAndroid Build Coastguard Worker mova m7, m6 ;out7 3854*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*6 ] ;out3 3855*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*7 ] ;out4 3856*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m1 ;out26 3857*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize*2+16*9 ] ;out6 3858*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*4 ] ;out1 3859*c0909341SAndroid Build Coastguard Worker ret 3860*c0909341SAndroid Build Coastguard Worker 3861*c0909341SAndroid Build Coastguard Worker 3862*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2 3863*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3864*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 3865*c0909341SAndroid Build Coastguard Worker%endif 3866*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3867*c0909341SAndroid Build Coastguard Worker jz .dconly 3868*c0909341SAndroid Build Coastguard Worker call m(idct_32x8_internal_8bpc) 3869*c0909341SAndroid Build Coastguard Worker RET 3870*c0909341SAndroid Build Coastguard Worker 3871*c0909341SAndroid Build Coastguard Worker.dconly: 3872*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 3873*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 3874*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 3875*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 3876*c0909341SAndroid Build Coastguard Worker mov r3d, 8 3877*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3878*c0909341SAndroid Build Coastguard Worker 3879*c0909341SAndroid Build Coastguard Worker.body: 3880*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 3881*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_2048)] ;intentionally rip-relative 3882*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 3883*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 3884*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 3885*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 3886*c0909341SAndroid Build Coastguard Worker pxor m5, m5 3887*c0909341SAndroid Build Coastguard Worker 3888*c0909341SAndroid Build Coastguard Worker.loop: 3889*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*0] 3890*c0909341SAndroid Build Coastguard Worker mova m3, [dstq+16*1] 3891*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m1, m5 3892*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m5 3893*c0909341SAndroid Build Coastguard Worker punpckhbw m4, m3, m5 3894*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m5 3895*c0909341SAndroid Build Coastguard Worker paddw m2, m0 3896*c0909341SAndroid Build Coastguard Worker paddw m1, m0 3897*c0909341SAndroid Build Coastguard Worker paddw m4, m0 3898*c0909341SAndroid Build Coastguard Worker paddw m3, m0 3899*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 3900*c0909341SAndroid Build Coastguard Worker packuswb m3, m4 3901*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m1 3902*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m3 3903*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3904*c0909341SAndroid Build Coastguard Worker dec r3d 3905*c0909341SAndroid Build Coastguard Worker jg .loop 3906*c0909341SAndroid Build Coastguard Worker jmp tx2q 3907*c0909341SAndroid Build Coastguard Worker 3908*c0909341SAndroid Build Coastguard Worker.end: 3909*c0909341SAndroid Build Coastguard Worker RET 3910*c0909341SAndroid Build Coastguard Worker 3911*c0909341SAndroid Build Coastguard Worker 3912*c0909341SAndroid Build Coastguard Workercglobal idct_32x8_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 3913*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 64 3914*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 3915*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 3916*c0909341SAndroid Build Coastguard Worker 3917*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*2, 64 3918*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3919*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 3920*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 3921*c0909341SAndroid Build Coastguard Worker 3922*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 32 3923*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 3924*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 3925*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 3926*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 3927*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 3928*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 3929*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 3930*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 3931*c0909341SAndroid Build Coastguard Worker 3932*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3933*c0909341SAndroid Build Coastguard Worker jg .full 3934*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 3935*c0909341SAndroid Build Coastguard Worker jmp .pass2 3936*c0909341SAndroid Build Coastguard Worker 3937*c0909341SAndroid Build Coastguard Worker.full: 3938*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*17, 32 3939*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 3940*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 3941*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 3942*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 3943*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 3944*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 3945*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 3946*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 3947*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 3948*c0909341SAndroid Build Coastguard Worker 3949*c0909341SAndroid Build Coastguard Worker.pass2: 3950*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3951*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 3952*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).end1 3953*c0909341SAndroid Build Coastguard Worker 3954*c0909341SAndroid Build Coastguard Worker.end: 3955*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3956*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 3957*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3958*c0909341SAndroid Build Coastguard Worker 3959*c0909341SAndroid Build Coastguard Worker.end1: 3960*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 3961*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end2)] 3962*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 3963*c0909341SAndroid Build Coastguard Worker 3964*c0909341SAndroid Build Coastguard Worker.end2: 3965*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 3966*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3967*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3968*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end3)] 3969*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3970*c0909341SAndroid Build Coastguard Worker 3971*c0909341SAndroid Build Coastguard Worker.end3: 3972*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3973*c0909341SAndroid Build Coastguard Worker add r3, 8 3974*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end4)] 3975*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 3976*c0909341SAndroid Build Coastguard Worker 3977*c0909341SAndroid Build Coastguard Worker.end4: 3978*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 3979*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3980*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3981*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end5)] 3982*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3983*c0909341SAndroid Build Coastguard Worker 3984*c0909341SAndroid Build Coastguard Worker.end5: 3985*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3986*c0909341SAndroid Build Coastguard Worker add r3, 8 3987*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end6)] 3988*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 3989*c0909341SAndroid Build Coastguard Worker 3990*c0909341SAndroid Build Coastguard Worker.end6: 3991*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 3992*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 3993*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 3994*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end7)] 3995*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 3996*c0909341SAndroid Build Coastguard Worker 3997*c0909341SAndroid Build Coastguard Worker.end7: 3998*c0909341SAndroid Build Coastguard Worker mov dstq, r3 3999*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end8)] 4000*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass2_main 4001*c0909341SAndroid Build Coastguard Worker 4002*c0909341SAndroid Build Coastguard Worker.end8: 4003*c0909341SAndroid Build Coastguard Worker ret 4004*c0909341SAndroid Build Coastguard Worker 4005*c0909341SAndroid Build Coastguard Worker 4006*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2 4007*c0909341SAndroid Build Coastguard Worker mov r5d, 4 4008*c0909341SAndroid Build Coastguard Worker mov tx2d, 2 4009*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 4010*c0909341SAndroid Build Coastguard Worker cmovns tx2d, r5d 4011*c0909341SAndroid Build Coastguard Worker mov r3d, tx2d 4012*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4013*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4014*c0909341SAndroid Build Coastguard Worker%endif 4015*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x8_internal_8bpc).end8)] 4016*c0909341SAndroid Build Coastguard Worker.loop: 4017*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 64 4018*c0909341SAndroid Build Coastguard Worker paddsw m6, [o(pw_5)] 4019*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4020*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_5)] 4021*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, m6}, m0, m1, m2, m3, m4, m5, m7 4022*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end3 4023*c0909341SAndroid Build Coastguard Worker REPX {psraw x, 3 }, m0, m1, m2, m3, m4, m5, m6, m7 4024*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m5 4025*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4026*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4027*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).end3 4028*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 4029*c0909341SAndroid Build Coastguard Worker pxor m7, m7 4030*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+64*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 4031*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4032*c0909341SAndroid Build Coastguard Worker dec r3d 4033*c0909341SAndroid Build Coastguard Worker jg .loop 4034*c0909341SAndroid Build Coastguard Worker RET 4035*c0909341SAndroid Build Coastguard Worker 4036*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2 4037*c0909341SAndroid Build Coastguard Worker mov r5d, 4 4038*c0909341SAndroid Build Coastguard Worker mov tx2d, 2 4039*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 4040*c0909341SAndroid Build Coastguard Worker cmovns tx2d, r5d 4041*c0909341SAndroid Build Coastguard Worker mov r3d, tx2d 4042*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4043*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4044*c0909341SAndroid Build Coastguard Worker%endif 4045*c0909341SAndroid Build Coastguard Worker 4046*c0909341SAndroid Build Coastguard Worker.loop: 4047*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 16 4048*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, [o(pw_4096)] 4049*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4050*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_4096)] 4051*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m6}, m0, m1, m2, m3, m4, m5, m7 4052*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x8_internal_8bpc).end8)] 4053*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end3 4054*c0909341SAndroid Build Coastguard Worker 4055*c0909341SAndroid Build Coastguard Worker mov [rsp+16*3], dstq 4056*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m5 4057*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4058*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4059*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_8x8_internal_8bpc).end4)] 4060*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).end3 4061*c0909341SAndroid Build Coastguard Worker 4062*c0909341SAndroid Build Coastguard Worker add coeffq, 16*8 4063*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+16*3] 4064*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+8] 4065*c0909341SAndroid Build Coastguard Worker dec r3d 4066*c0909341SAndroid Build Coastguard Worker jg .loop 4067*c0909341SAndroid Build Coastguard Worker jnc .loop 4068*c0909341SAndroid Build Coastguard Worker RET 4069*c0909341SAndroid Build Coastguard Worker 4070*c0909341SAndroid Build Coastguard Worker 4071*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2 4072*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4073*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4074*c0909341SAndroid Build Coastguard Worker%endif 4075*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4076*c0909341SAndroid Build Coastguard Worker jz .dconly 4077*c0909341SAndroid Build Coastguard Worker call m(idct_16x32_internal_8bpc) 4078*c0909341SAndroid Build Coastguard Worker.end: 4079*c0909341SAndroid Build Coastguard Worker RET 4080*c0909341SAndroid Build Coastguard Worker 4081*c0909341SAndroid Build Coastguard Worker.dconly: 4082*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 4083*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 4084*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 4085*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 4086*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 4087*c0909341SAndroid Build Coastguard Worker mov r2d, 16 4088*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 4089*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 4090*c0909341SAndroid Build Coastguard Worker 4091*c0909341SAndroid Build Coastguard Worker 4092*c0909341SAndroid Build Coastguard Workercglobal idct_16x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 4093*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*1, 128, 1 4094*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4095*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4096*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*5, 128, 1 4097*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4098*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 4099*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4100*c0909341SAndroid Build Coastguard Worker 4101*c0909341SAndroid Build Coastguard Worker.pass1_end: 4102*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*33, 64 ;in8~in15 4103*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 4104*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4105*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 4106*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4107*c0909341SAndroid Build Coastguard Worker 4108*c0909341SAndroid Build Coastguard Worker.pass1_end1: 4109*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*1 ], m0 ;in8 4110*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*5 ], m4 ;in12 4111*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*13], m2 ;in10 4112*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*14], m6 ;in14 4113*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m1 ;in9 4114*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m3 ;in11 4115*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m5 ;in13 4116*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 4117*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 128, 1 4118*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4119*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4120*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*4, 128, 1 4121*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4122*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 4123*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4124*c0909341SAndroid Build Coastguard Worker 4125*c0909341SAndroid Build Coastguard Worker.pass1_end2: 4126*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*32, 64 ;in0~in7 4127*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 4128*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4129*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 4130*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4131*c0909341SAndroid Build Coastguard Worker 4132*c0909341SAndroid Build Coastguard Worker.pass1_end3: 4133*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*11], m2 ;in2 4134*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*12], m6 ;in6 4135*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m1 ;in1 4136*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m3 ;in3 4137*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m5 ;in5 4138*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m7 ;in7 4139*c0909341SAndroid Build Coastguard Worker 4140*c0909341SAndroid Build Coastguard Worker cmp eobd, 150 4141*c0909341SAndroid Build Coastguard Worker jg .full 4142*c0909341SAndroid Build Coastguard Worker 4143*c0909341SAndroid Build Coastguard Worker mova m1, m4 ;in4 4144*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*1 ] ;in8 4145*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*5 ] ;in12 4146*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4147*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4148*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4149*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4150*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+16*11] ;in2 4151*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+16*12] ;in6 4152*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+16*13] ;in10 4153*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+16*14] ;in14 4154*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4155*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4156*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4157*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4158*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4159*c0909341SAndroid Build Coastguard Worker 4160*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 4161*c0909341SAndroid Build Coastguard Worker jmp .pass2 4162*c0909341SAndroid Build Coastguard Worker 4163*c0909341SAndroid Build Coastguard Worker.full: 4164*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*0 ], m0 ;in0 4165*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*4 ], m4 ;in4 4166*c0909341SAndroid Build Coastguard Worker 4167*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*2, 128, 1 4168*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4169*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4170*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*6, 128, 1 4171*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4172*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 4173*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4174*c0909341SAndroid Build Coastguard Worker 4175*c0909341SAndroid Build Coastguard Worker.pass1_end4: 4176*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*34, 64 ;in16~in23 4177*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 4178*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4179*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end5)] 4180*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4181*c0909341SAndroid Build Coastguard Worker 4182*c0909341SAndroid Build Coastguard Worker.pass1_end5: 4183*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*2 ], m0 ;in16 4184*c0909341SAndroid Build Coastguard Worker mova [coeffq+16*6 ], m4 ;in20 4185*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*15], m2 ;in18 4186*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*16], m6 ;in22 4187*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m1 ;in17 4188*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m3 ;in19 4189*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m5 ;in21 4190*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m7 ;in23 4191*c0909341SAndroid Build Coastguard Worker 4192*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*3, 128, 1 4193*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4194*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4195*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*7, 128, 1 4196*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4197*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end6)] 4198*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4199*c0909341SAndroid Build Coastguard Worker 4200*c0909341SAndroid Build Coastguard Worker.pass1_end6: 4201*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*35, 64 ;in24~in31 4202*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 4203*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4204*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end7)] 4205*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4206*c0909341SAndroid Build Coastguard Worker 4207*c0909341SAndroid Build Coastguard Worker.pass1_end7: 4208*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*17], m2 ;in26 4209*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*18], m6 ;in30 4210*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m1 ;in25 4211*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m3 ;in27 4212*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m5 ;in29 4213*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 4214*c0909341SAndroid Build Coastguard Worker 4215*c0909341SAndroid Build Coastguard Worker mova m6, m0 ;in24 4216*c0909341SAndroid Build Coastguard Worker mova m7, m4 ;in28 4217*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] ;in0 4218*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*4 ] ;in4 4219*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*1 ] ;in8 4220*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*5 ] ;in12 4221*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*2 ] ;in16 4222*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*6 ] ;in20 4223*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4224*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3 , 16 4225*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 4226*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4227*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4228*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4229*c0909341SAndroid Build Coastguard Worker 4230*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 4231*c0909341SAndroid Build Coastguard Worker 4232*c0909341SAndroid Build Coastguard Worker.pass2: 4233*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*35], eobd 4234*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 4235*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*35], r3 4236*c0909341SAndroid Build Coastguard Worker lea r3, [o(.end)] 4237*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).end 4238*c0909341SAndroid Build Coastguard Worker 4239*c0909341SAndroid Build Coastguard Worker.end: 4240*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*35] 4241*c0909341SAndroid Build Coastguard Worker mov eobd, [rsp+gprsize*1+16*35] 4242*c0909341SAndroid Build Coastguard Worker add coeffq, 16*32 4243*c0909341SAndroid Build Coastguard Worker 4244*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] ;in1 4245*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*12] ;in3 4246*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*20] ;in5 4247*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*28] ;in7 4248*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5 ] ;in9 4249*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*13] ;in11 4250*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*21] ;in13 4251*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*29] ;in15 4252*c0909341SAndroid Build Coastguard Worker 4253*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 4254*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 4255*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 4256*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 4257*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 4258*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 4259*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 4260*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 4261*c0909341SAndroid Build Coastguard Worker 4262*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] ;in0 4263*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*16] ;in4 4264*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*1 ] ;in8 4265*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*17] ;in12 4266*c0909341SAndroid Build Coastguard Worker 4267*c0909341SAndroid Build Coastguard Worker cmp eobd, 150 4268*c0909341SAndroid Build Coastguard Worker jg .full1 4269*c0909341SAndroid Build Coastguard Worker 4270*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4271*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4272*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4273*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4274*c0909341SAndroid Build Coastguard Worker 4275*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] ;in2 4276*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] ;in6 4277*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] ;in10 4278*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] ;in14 4279*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4280*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4281*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4282*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4283*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4284*c0909341SAndroid Build Coastguard Worker 4285*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 4286*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).pass2 4287*c0909341SAndroid Build Coastguard Worker 4288*c0909341SAndroid Build Coastguard Worker.full1: 4289*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*2 ] ;in16 4290*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*18] ;in20 4291*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*3 ] ;in24 4292*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*19] ;in26 4293*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4294*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4295*c0909341SAndroid Build Coastguard Worker 4296*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] ;in2 4297*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] ;in6 4298*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] ;in10 4299*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] ;in14 4300*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*10] ;in18 4301*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*26] ;in22 4302*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*11] ;in26 4303*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*27] ;in30 4304*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4305*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4306*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4307*c0909341SAndroid Build Coastguard Worker 4308*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*6 ] ;in17 4309*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*14] ;in19 4310*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*22] ;in21 4311*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*30] ;in23 4312*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*7 ] ;in25 4313*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*15] ;in27 4314*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*23] ;in29 4315*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*31] ;in31 4316*c0909341SAndroid Build Coastguard Worker 4317*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 4318*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 4319*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 4320*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 4321*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 4322*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 4323*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 4324*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 4325*c0909341SAndroid Build Coastguard Worker 4326*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 4327*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).pass2 4328*c0909341SAndroid Build Coastguard Worker 4329*c0909341SAndroid Build Coastguard Worker 4330*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2 4331*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4332*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4333*c0909341SAndroid Build Coastguard Worker%endif 4334*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4335*c0909341SAndroid Build Coastguard Worker jz .dconly 4336*c0909341SAndroid Build Coastguard Worker 4337*c0909341SAndroid Build Coastguard Worker call m(idct_32x16_internal_8bpc) 4338*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).pass2 4339*c0909341SAndroid Build Coastguard Worker 4340*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 4341*c0909341SAndroid Build Coastguard Worker lea dstq, [r3+8] 4342*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+16*11, 16 4343*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4344*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4345*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end 4346*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).pass2 4347*c0909341SAndroid Build Coastguard Worker 4348*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 4349*c0909341SAndroid Build Coastguard Worker lea dstq, [r3+8] 4350*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+16*19, 16 4351*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4352*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4353*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end 4354*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).pass2 4355*c0909341SAndroid Build Coastguard Worker 4356*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 4357*c0909341SAndroid Build Coastguard Worker lea dstq, [r3+8] 4358*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+16*27, 16 4359*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4360*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4361*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end 4362*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).pass2 4363*c0909341SAndroid Build Coastguard Worker RET 4364*c0909341SAndroid Build Coastguard Worker 4365*c0909341SAndroid Build Coastguard Worker.dconly: 4366*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 4367*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 4368*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 4369*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 4370*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 4371*c0909341SAndroid Build Coastguard Worker mov r3d, 16 4372*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_32x8_8bpc).end)] 4373*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body 4374*c0909341SAndroid Build Coastguard Worker 4375*c0909341SAndroid Build Coastguard Worker 4376*c0909341SAndroid Build Coastguard Workercglobal idct_32x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 4377*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4378*c0909341SAndroid Build Coastguard Worker lea r3, [o(.pass1_end1)] 4379*c0909341SAndroid Build Coastguard Worker.pass1: 4380*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*0, 128, 1 4381*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4382*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4383*c0909341SAndroid Build Coastguard Worker 4384*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*4, 128, 1 4385*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4386*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4387*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4388*c0909341SAndroid Build Coastguard Worker 4389*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*2, 64, 1 4390*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 4391*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 4392*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 4393*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 4394*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 4395*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 4396*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 4397*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 4398*c0909341SAndroid Build Coastguard Worker 4399*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+16*34, 64, 1 4400*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 4401*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 4402*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 4403*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 4404*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 4405*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 4406*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 4407*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 4408*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 4409*c0909341SAndroid Build Coastguard Worker 4410*c0909341SAndroid Build Coastguard Worker.pass1_end: 4411*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 4412*c0909341SAndroid Build Coastguard Worker mov tx2q, r3 4413*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4414*c0909341SAndroid Build Coastguard Worker 4415*c0909341SAndroid Build Coastguard Worker.pass1_end1: 4416*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*0, 32 4417*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 4418*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 4419*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 4420*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4421*c0909341SAndroid Build Coastguard Worker 4422*c0909341SAndroid Build Coastguard Worker.pass1_end2: 4423*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*16, 32 4424*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 4425*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 4426*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 4427*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4428*c0909341SAndroid Build Coastguard Worker 4429*c0909341SAndroid Build Coastguard Worker.pass1_end3: 4430*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*32, 32 4431*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 4432*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0 ], m7 4433*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 4434*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 4435*c0909341SAndroid Build Coastguard Worker 4436*c0909341SAndroid Build Coastguard Worker.pass1_end4: 4437*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+16*48, 32 4438*c0909341SAndroid Build Coastguard Worker 4439*c0909341SAndroid Build Coastguard Worker sub coeffq, 16 4440*c0909341SAndroid Build Coastguard Worker lea r3, [o(.end)] 4441*c0909341SAndroid Build Coastguard Worker jmp .pass1 4442*c0909341SAndroid Build Coastguard Worker 4443*c0909341SAndroid Build Coastguard Worker.end: 4444*c0909341SAndroid Build Coastguard Worker ret 4445*c0909341SAndroid Build Coastguard Worker 4446*c0909341SAndroid Build Coastguard Worker 4447*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2 4448*c0909341SAndroid Build Coastguard Worker mov r4d, eobd 4449*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 ;if (eob > 43) 4450*c0909341SAndroid Build Coastguard Worker sbb r3d, r3d ; iteration_count++ 4451*c0909341SAndroid Build Coastguard Worker cmp r4d, 150 ;if (eob > 150) 4452*c0909341SAndroid Build Coastguard Worker sbb r3d, 0 ; iteration_count++ 4453*c0909341SAndroid Build Coastguard Worker cmp r4d, 278 ;if (eob > 278) 4454*c0909341SAndroid Build Coastguard Worker sbb r3d, -4 ; iteration_count++ 4455*c0909341SAndroid Build Coastguard Worker 4456*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4457*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4458*c0909341SAndroid Build Coastguard Worker%endif 4459*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 4460*c0909341SAndroid Build Coastguard Worker mov [rsp+16*3], r4 4461*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize+16*3], r3d 4462*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*3], coeffq 4463*c0909341SAndroid Build Coastguard Worker 4464*c0909341SAndroid Build Coastguard Worker.loop: 4465*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 64, 1 4466*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4467*c0909341SAndroid Build Coastguard Worker pxor m6, m6 4468*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+64*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7 4469*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4470*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end3 4471*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m2 4472*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m3 4473*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m4 4474*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_1697x16)] 4475*c0909341SAndroid Build Coastguard Worker mova m4, [o(pw_16384)] 4476*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 2, 3, 4}, 5, 6, 7, 0, 1 4477*c0909341SAndroid Build Coastguard Worker mova m2, [o(pw_8192)] 4478*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m2}, m5, m6, m7, m0, m1 4479*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+16*0] 4480*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4481*c0909341SAndroid Build Coastguard Worker IDTX16 2, 7, 3, 4 4482*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*2] 4483*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m5 4484*c0909341SAndroid Build Coastguard Worker IDTX16 7, 5, 3, 4 4485*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*1] 4486*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4487*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 4488*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 4489*c0909341SAndroid Build Coastguard Worker psrlw m4, 1 ; pw_8192 4490*c0909341SAndroid Build Coastguard Worker paddsw m3, m5 4491*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 4492*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 4493*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7 4494*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).end3 4495*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 4496*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4497*c0909341SAndroid Build Coastguard Worker dec r3d 4498*c0909341SAndroid Build Coastguard Worker jg .loop 4499*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*3] 4500*c0909341SAndroid Build Coastguard Worker add coeffq, 64*8 4501*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize+16*3] 4502*c0909341SAndroid Build Coastguard Worker xor dstq, dstq 4503*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize+16*3], dstq 4504*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+16*3] 4505*c0909341SAndroid Build Coastguard Worker test r3d, r3d 4506*c0909341SAndroid Build Coastguard Worker jnz .loop 4507*c0909341SAndroid Build Coastguard Worker RET 4508*c0909341SAndroid Build Coastguard Worker 4509*c0909341SAndroid Build Coastguard Worker 4510*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_8bpc, 4, 6, 8, 16*4, dst, stride, coeff, eob, tx2 4511*c0909341SAndroid Build Coastguard Worker mov r4d, 12 ;0100b 4512*c0909341SAndroid Build Coastguard Worker mov r5d, 136 ;1000 1000b 4513*c0909341SAndroid Build Coastguard Worker cmp eobd, 44 ;if (eob > 43) 4514*c0909341SAndroid Build Coastguard Worker cmovns r4d, r5d ; iteration_count+2 4515*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 ;if (eob > 150) 4516*c0909341SAndroid Build Coastguard Worker mov r3d, 34952 ;1000 1000 1000 1000b 4517*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d ; iteration_count += 4 4518*c0909341SAndroid Build Coastguard Worker 4519*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4520*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4521*c0909341SAndroid Build Coastguard Worker%endif 4522*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 4523*c0909341SAndroid Build Coastguard Worker mov [rsp+16*3], r4 4524*c0909341SAndroid Build Coastguard Worker 4525*c0909341SAndroid Build Coastguard Worker.loop: 4526*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 32, 1 4527*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x}, m0, m1, m2, m3, m4, m5, m6, m7 4528*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4529*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4530*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end3 4531*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m5 4532*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m6 4533*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_1697x16)] 4534*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 5, 6}, 7, 0, 1, 2, 3, 4 4535*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [o(pw_2048)] 4536*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*1] 4537*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4538*c0909341SAndroid Build Coastguard Worker IDTX16 5, 7, 6 4539*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*2] 4540*c0909341SAndroid Build Coastguard Worker IDTX16 7, 6, 6 4541*c0909341SAndroid Build Coastguard Worker mova m6, [o(pw_2048)] 4542*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m6}, m0, m1, m2, m3, m4, m5, m7 4543*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m5 4544*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m7 4545*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).end3 4546*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 4547*c0909341SAndroid Build Coastguard Worker pxor m7, m7 4548*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+32*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 4549*c0909341SAndroid Build Coastguard Worker 4550*c0909341SAndroid Build Coastguard Worker.loop_end: 4551*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4552*c0909341SAndroid Build Coastguard Worker shr r3d, 2 4553*c0909341SAndroid Build Coastguard Worker jz .ret 4554*c0909341SAndroid Build Coastguard Worker test r3d, 2 4555*c0909341SAndroid Build Coastguard Worker jnz .loop 4556*c0909341SAndroid Build Coastguard Worker mov r4d, r3d 4557*c0909341SAndroid Build Coastguard Worker and r4d, 1 4558*c0909341SAndroid Build Coastguard Worker lea coeffq, [coeffq+r4*8+32*7] 4559*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+16*3] 4560*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 4561*c0909341SAndroid Build Coastguard Worker mov [rsp+16*3], r4 4562*c0909341SAndroid Build Coastguard Worker jmp .loop 4563*c0909341SAndroid Build Coastguard Worker 4564*c0909341SAndroid Build Coastguard Worker.ret: 4565*c0909341SAndroid Build Coastguard Worker RET 4566*c0909341SAndroid Build Coastguard Worker 4567*c0909341SAndroid Build Coastguard Worker 4568*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_8bpc, 4, 6, 8, 16*36, dst, stride, coeff, eob, tx2 4569*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4570*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4571*c0909341SAndroid Build Coastguard Worker%endif 4572*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4573*c0909341SAndroid Build Coastguard Worker jz .dconly 4574*c0909341SAndroid Build Coastguard Worker 4575*c0909341SAndroid Build Coastguard Worker call m(idct_32x32_internal_8bpc) 4576*c0909341SAndroid Build Coastguard Worker RET 4577*c0909341SAndroid Build Coastguard Worker 4578*c0909341SAndroid Build Coastguard Worker.dconly: 4579*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 4580*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 4581*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 4582*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 4583*c0909341SAndroid Build Coastguard Worker mov r3d, 32 4584*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_32x8_8bpc).end)] 4585*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body 4586*c0909341SAndroid Build Coastguard Worker 4587*c0909341SAndroid Build Coastguard Worker 4588*c0909341SAndroid Build Coastguard Workercglobal idct_32x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 4589*c0909341SAndroid Build Coastguard Worker mov r4d, 2 4590*c0909341SAndroid Build Coastguard Worker sub eobd, 136 4591*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*35], eobd 4592*c0909341SAndroid Build Coastguard Worker mov r3d, 4 4593*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d 4594*c0909341SAndroid Build Coastguard Worker 4595*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4596*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4597*c0909341SAndroid Build Coastguard Worker%endif 4598*c0909341SAndroid Build Coastguard Worker 4599*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*35], coeffq 4600*c0909341SAndroid Build Coastguard Worker 4601*c0909341SAndroid Build Coastguard Worker.pass1_loop: 4602*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*1, 64*2 4603*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 4604*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 4605*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 4606*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 4607*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 4608*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 4609*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 4610*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 4611*c0909341SAndroid Build Coastguard Worker 4612*c0909341SAndroid Build Coastguard Worker mov tx2d, [rsp+gprsize*1+16*35] 4613*c0909341SAndroid Build Coastguard Worker test tx2d, tx2d 4614*c0909341SAndroid Build Coastguard Worker jl .fast 4615*c0909341SAndroid Build Coastguard Worker 4616*c0909341SAndroid Build Coastguard Worker.full: 4617*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*0, 64*4 4618*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4619*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4620*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*2, 64*4 4621*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4622*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4623*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4624*c0909341SAndroid Build Coastguard Worker 4625*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*17, 64*2 4626*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 4627*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 4628*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 4629*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 4630*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 4631*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 4632*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 4633*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 4634*c0909341SAndroid Build Coastguard Worker 4635*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 4636*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 4637*c0909341SAndroid Build Coastguard Worker 4638*c0909341SAndroid Build Coastguard Worker.fast: 4639*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+256*0] 4640*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+256*1] 4641*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+256*2] 4642*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+256*3] 4643*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4644*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4645*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4646*c0909341SAndroid Build Coastguard Worker 4647*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4648*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+128*1] 4649*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+128*3] 4650*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+128*5] 4651*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+128*7] 4652*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4653*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4654*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4655*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4656*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4657*c0909341SAndroid Build Coastguard Worker 4658*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 4659*c0909341SAndroid Build Coastguard Worker 4660*c0909341SAndroid Build Coastguard Worker.pass1_end: 4661*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4662*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4663*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 4664*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4665*c0909341SAndroid Build Coastguard Worker 4666*c0909341SAndroid Build Coastguard Worker.pass1_end1: 4667*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*0, 64 4668*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 4669*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4670*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4671*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 4672*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4673*c0909341SAndroid Build Coastguard Worker 4674*c0909341SAndroid Build Coastguard Worker.pass1_end2: 4675*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*8, 64 4676*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 4677*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4678*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4679*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 4680*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4681*c0909341SAndroid Build Coastguard Worker 4682*c0909341SAndroid Build Coastguard Worker.pass1_end3: 4683*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*16, 64 4684*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 4685*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4686*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4687*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 4688*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4689*c0909341SAndroid Build Coastguard Worker 4690*c0909341SAndroid Build Coastguard Worker.pass1_end4: 4691*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*24, 64 4692*c0909341SAndroid Build Coastguard Worker 4693*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4694*c0909341SAndroid Build Coastguard Worker dec r3d 4695*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 4696*c0909341SAndroid Build Coastguard Worker 4697*c0909341SAndroid Build Coastguard Worker 4698*c0909341SAndroid Build Coastguard Worker.pass2: 4699*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*35] 4700*c0909341SAndroid Build Coastguard Worker mov r3d, 4 4701*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass2_end)] 4702*c0909341SAndroid Build Coastguard Worker 4703*c0909341SAndroid Build Coastguard Worker.pass2_loop: 4704*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*35], r3d 4705*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 4706*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*35], r3 4707*c0909341SAndroid Build Coastguard Worker 4708*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] 4709*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*12] 4710*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*20] 4711*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*28] 4712*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5 ] 4713*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*13] 4714*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*21] 4715*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*29] 4716*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 4717*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 4718*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 4719*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 4720*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 4721*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 4722*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 4723*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 4724*c0909341SAndroid Build Coastguard Worker 4725*c0909341SAndroid Build Coastguard Worker mov eobd, [rsp+gprsize*1+16*35] 4726*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4727*c0909341SAndroid Build Coastguard Worker jl .fast1 4728*c0909341SAndroid Build Coastguard Worker 4729*c0909341SAndroid Build Coastguard Worker.full1: 4730*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] 4731*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*16] 4732*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*1 ] 4733*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*17] 4734*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*2 ] 4735*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*18] 4736*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*3 ] 4737*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*19] 4738*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4739*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4740*c0909341SAndroid Build Coastguard Worker 4741*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] 4742*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] 4743*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] 4744*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] 4745*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*10] 4746*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*26] 4747*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*11] 4748*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*27] 4749*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4750*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4751*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4752*c0909341SAndroid Build Coastguard Worker 4753*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*6 ] 4754*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*14] 4755*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*22] 4756*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*30] 4757*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*7 ] 4758*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*15] 4759*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*23] 4760*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*31] 4761*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 4762*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 4763*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 4764*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 4765*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 4766*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 4767*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 4768*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 4769*c0909341SAndroid Build Coastguard Worker 4770*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 4771*c0909341SAndroid Build Coastguard Worker jmp tx2q 4772*c0909341SAndroid Build Coastguard Worker 4773*c0909341SAndroid Build Coastguard Worker.fast1: 4774*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0 ] 4775*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*16] 4776*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*1 ] 4777*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*17] 4778*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4779*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4780*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4781*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4782*c0909341SAndroid Build Coastguard Worker 4783*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] 4784*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] 4785*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] 4786*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] 4787*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4788*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4789*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4790*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4791*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4792*c0909341SAndroid Build Coastguard Worker 4793*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 4794*c0909341SAndroid Build Coastguard Worker jmp tx2q 4795*c0909341SAndroid Build Coastguard Worker 4796*c0909341SAndroid Build Coastguard Worker.pass2_end: 4797*c0909341SAndroid Build Coastguard Worker lea r3, [o(.pass2_end1)] 4798*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).end 4799*c0909341SAndroid Build Coastguard Worker 4800*c0909341SAndroid Build Coastguard Worker.pass2_end1: 4801*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass2_end)] 4802*c0909341SAndroid Build Coastguard Worker add coeffq, 16*32 4803*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*35] 4804*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*3+16*35] 4805*c0909341SAndroid Build Coastguard Worker dec r3d 4806*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 4807*c0909341SAndroid Build Coastguard Worker 4808*c0909341SAndroid Build Coastguard Worker ret 4809*c0909341SAndroid Build Coastguard Worker 4810*c0909341SAndroid Build Coastguard Worker 4811*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_8bpc, 4, 6, 8, 16*5, dst, stride, coeff, eob, tx2 4812*c0909341SAndroid Build Coastguard Worker mov r4d, 2 4813*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 4814*c0909341SAndroid Build Coastguard Worker mov r3d, 4 4815*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d 4816*c0909341SAndroid Build Coastguard Worker 4817*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4818*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4819*c0909341SAndroid Build Coastguard Worker%endif 4820*c0909341SAndroid Build Coastguard Worker 4821*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 4822*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*0+16*3], r4 4823*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*3], r3d 4824*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*3], r3d 4825*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*3], coeffq 4826*c0909341SAndroid Build Coastguard Worker 4827*c0909341SAndroid Build Coastguard Worker.loop: 4828*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq, 64 4829*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4830*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x16_internal_8bpc).end)] 4831*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).pass1_end3 4832*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [o(pw_8192)] 4833*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m7 4834*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4835*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 4836*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m6 4837*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m5 4838*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).end3 4839*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 4840*c0909341SAndroid Build Coastguard Worker 4841*c0909341SAndroid Build Coastguard Worker pxor m7, m7 4842*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+64*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 4843*c0909341SAndroid Build Coastguard Worker 4844*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4845*c0909341SAndroid Build Coastguard Worker dec r3d 4846*c0909341SAndroid Build Coastguard Worker jg .loop 4847*c0909341SAndroid Build Coastguard Worker 4848*c0909341SAndroid Build Coastguard Worker mov r4d, [rsp+gprsize*2+16*3] 4849*c0909341SAndroid Build Coastguard Worker dec r4d 4850*c0909341SAndroid Build Coastguard Worker jle .ret 4851*c0909341SAndroid Build Coastguard Worker 4852*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*0+16*3] 4853*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*3+16*3] 4854*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*3], r4 4855*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+8] 4856*c0909341SAndroid Build Coastguard Worker add coeffq, 64*8 4857*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*0+16*3], r3 4858*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*1+16*3] 4859*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*3], coeffq 4860*c0909341SAndroid Build Coastguard Worker jmp .loop 4861*c0909341SAndroid Build Coastguard Worker 4862*c0909341SAndroid Build Coastguard Worker.ret: 4863*c0909341SAndroid Build Coastguard Worker RET 4864*c0909341SAndroid Build Coastguard Worker 4865*c0909341SAndroid Build Coastguard Worker 4866*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_8bpc, 4, 6, 8, 16*68, dst, stride, coeff, eob, tx2 4867*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4868*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4869*c0909341SAndroid Build Coastguard Worker%endif 4870*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4871*c0909341SAndroid Build Coastguard Worker jz .dconly 4872*c0909341SAndroid Build Coastguard Worker call m(idct_16x64_internal_8bpc) 4873*c0909341SAndroid Build Coastguard Worker.end: 4874*c0909341SAndroid Build Coastguard Worker RET 4875*c0909341SAndroid Build Coastguard Worker 4876*c0909341SAndroid Build Coastguard Worker.dconly: 4877*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 4878*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 4879*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 4880*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 4881*c0909341SAndroid Build Coastguard Worker mov r2d, 32 4882*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 4883*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 4884*c0909341SAndroid Build Coastguard Worker 4885*c0909341SAndroid Build Coastguard Worker 4886*c0909341SAndroid Build Coastguard Workercglobal idct_16x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 4887*c0909341SAndroid Build Coastguard Worker mov r4d, 2 4888*c0909341SAndroid Build Coastguard Worker sub eobd, 151 4889*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], eobd 4890*c0909341SAndroid Build Coastguard Worker mov r3d, 4 4891*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d 4892*c0909341SAndroid Build Coastguard Worker 4893*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4894*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 4895*c0909341SAndroid Build Coastguard Worker%endif 4896*c0909341SAndroid Build Coastguard Worker 4897*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], coeffq 4898*c0909341SAndroid Build Coastguard Worker 4899*c0909341SAndroid Build Coastguard Worker.pass1_loop: 4900*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*0, 64*2 4901*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4902*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4903*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*1, 64*2 4904*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4905*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4906*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 4907*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4908*c0909341SAndroid Build Coastguard Worker 4909*c0909341SAndroid Build Coastguard Worker.pass1_end: 4910*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*8, 64 4911*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 4912*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 4913*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 4914*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 4915*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 4916*c0909341SAndroid Build Coastguard Worker 4917*c0909341SAndroid Build Coastguard Worker.pass1_end1: 4918*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*0, 64 4919*c0909341SAndroid Build Coastguard Worker 4920*c0909341SAndroid Build Coastguard Worker add coeffq, 16 4921*c0909341SAndroid Build Coastguard Worker dec r3d 4922*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 4923*c0909341SAndroid Build Coastguard Worker 4924*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*67] 4925*c0909341SAndroid Build Coastguard Worker mov r3d, 2 4926*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 4927*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 4928*c0909341SAndroid Build Coastguard Worker lea r4, [o(.end1)] 4929*c0909341SAndroid Build Coastguard Worker 4930*c0909341SAndroid Build Coastguard Worker.pass2_loop: 4931*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*67], r3d 4932*c0909341SAndroid Build Coastguard Worker mov eobd, [rsp+gprsize*1+16*67] 4933*c0909341SAndroid Build Coastguard Worker 4934*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*4 ] ;in1 4935*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*12] ;in3 4936*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*20] ;in5 4937*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*28] ;in7 4938*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*5 ] ;in9 4939*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*13] ;in11 4940*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*21] ;in13 4941*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*29] ;in15 4942*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*35], m0 ;in1 4943*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*49], m1 ;in3 4944*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*43], m2 ;in5 4945*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*41], m3 ;in7 4946*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*39], m4 ;in9 4947*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*45], m5 ;in11 4948*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*47], m6 ;in13 4949*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*37], m7 ;in15 4950*c0909341SAndroid Build Coastguard Worker 4951*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4952*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*0] 4953*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*1] 4954*c0909341SAndroid Build Coastguard Worker 4955*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4956*c0909341SAndroid Build Coastguard Worker jl .fast 4957*c0909341SAndroid Build Coastguard Worker 4958*c0909341SAndroid Build Coastguard Worker.full: 4959*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*2] 4960*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*3] 4961*c0909341SAndroid Build Coastguard Worker 4962*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4963*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 4964*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 4965*c0909341SAndroid Build Coastguard Worker 4966*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4967*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*16] 4968*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*17] 4969*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*18] 4970*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*19] 4971*c0909341SAndroid Build Coastguard Worker 4972*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4973*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4974*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 4975*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 4976*c0909341SAndroid Build Coastguard Worker 4977*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] 4978*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] 4979*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] 4980*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] 4981*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*10] 4982*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*26] 4983*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*11] 4984*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*27] 4985*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 4986*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 4987*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 4988*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 4989*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 4990*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 4991*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 4992*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 4993*c0909341SAndroid Build Coastguard Worker 4994*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 4995*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*3, 16 4996*c0909341SAndroid Build Coastguard Worker 4997*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*6 ] ;in17 4998*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*14] ;in19 4999*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*22] ;in21 5000*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*30] ;in23 5001*c0909341SAndroid Build Coastguard Worker mova m4, [coeffq+16*7 ] ;in25 5002*c0909341SAndroid Build Coastguard Worker mova m5, [coeffq+16*15] ;in27 5003*c0909341SAndroid Build Coastguard Worker mova m6, [coeffq+16*23] ;in29 5004*c0909341SAndroid Build Coastguard Worker mova m7, [coeffq+16*31] ;in31 5005*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*63], m0 ;in17 5006*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*53], m1 ;in19 5007*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*55], m2 ;in21 5008*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*61], m3 ;in23 5009*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*59], m4 ;in25 5010*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*57], m5 ;in27 5011*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*51], m6 ;in29 5012*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*65], m7 ;in31 5013*c0909341SAndroid Build Coastguard Worker 5014*c0909341SAndroid Build Coastguard Worker call .main 5015*c0909341SAndroid Build Coastguard Worker jmp .end 5016*c0909341SAndroid Build Coastguard Worker 5017*c0909341SAndroid Build Coastguard Worker.fast: 5018*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m2, m3, m5, m6, m7 5019*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 5020*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 5021*c0909341SAndroid Build Coastguard Worker 5022*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5023*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*16] 5024*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*17] 5025*c0909341SAndroid Build Coastguard Worker 5026*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m2, m3, m5, m6, m7 5027*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 5028*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 5029*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 5030*c0909341SAndroid Build Coastguard Worker 5031*c0909341SAndroid Build Coastguard Worker mova m0, [coeffq+16*8 ] 5032*c0909341SAndroid Build Coastguard Worker mova m1, [coeffq+16*24] 5033*c0909341SAndroid Build Coastguard Worker mova m2, [coeffq+16*9 ] 5034*c0909341SAndroid Build Coastguard Worker mova m3, [coeffq+16*25] 5035*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 5036*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 5037*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 5038*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 5039*c0909341SAndroid Build Coastguard Worker 5040*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_veryfast 5041*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*3, 16 5042*c0909341SAndroid Build Coastguard Worker 5043*c0909341SAndroid Build Coastguard Worker call .main_fast 5044*c0909341SAndroid Build Coastguard Worker 5045*c0909341SAndroid Build Coastguard Worker.end: 5046*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 5047*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5048*c0909341SAndroid Build Coastguard Worker mov r3, r4 5049*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).end2 5050*c0909341SAndroid Build Coastguard Worker 5051*c0909341SAndroid Build Coastguard Worker.end1: 5052*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*35, 16 5053*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 5054*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+16*32+gprsize] 5055*c0909341SAndroid Build Coastguard Worker call .write 5056*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 5057*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*3+16*67] 5058*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 5059*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 5060*c0909341SAndroid Build Coastguard Worker lea r4, [o(.end1)] 5061*c0909341SAndroid Build Coastguard Worker 5062*c0909341SAndroid Build Coastguard Worker dec r3d 5063*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 5064*c0909341SAndroid Build Coastguard Worker ret 5065*c0909341SAndroid Build Coastguard Worker.write: 5066*c0909341SAndroid Build Coastguard Worker mova [r3+16*0], m7 5067*c0909341SAndroid Build Coastguard Worker mov r4, -16*32 5068*c0909341SAndroid Build Coastguard Worker pxor m7, m7 5069*c0909341SAndroid Build Coastguard Worker sub coeffq, r4 5070*c0909341SAndroid Build Coastguard Worker.zero_loop: 5071*c0909341SAndroid Build Coastguard Worker mova [coeffq+r4+16*0], m7 5072*c0909341SAndroid Build Coastguard Worker mova [coeffq+r4+16*1], m7 5073*c0909341SAndroid Build Coastguard Worker add r4, 16*2 5074*c0909341SAndroid Build Coastguard Worker jl .zero_loop 5075*c0909341SAndroid Build Coastguard Worker call .write_main2 5076*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS r3+16*11, 16 5077*c0909341SAndroid Build Coastguard Worker call .write_main 5078*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS r3+16*19, 16 5079*c0909341SAndroid Build Coastguard Worker call .write_main 5080*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS r3+16*27, 16 5081*c0909341SAndroid Build Coastguard Worker.write_main: 5082*c0909341SAndroid Build Coastguard Worker mova [r3+16*0], m7 5083*c0909341SAndroid Build Coastguard Worker.write_main2: 5084*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_2048)] 5085*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 5086*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [r3+16*0] 5087*c0909341SAndroid Build Coastguard Worker mova [r3+16*2], m5 5088*c0909341SAndroid Build Coastguard Worker mova [r3+16*1], m6 5089*c0909341SAndroid Build Coastguard Worker mova [r3+16*0], m7 5090*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 2, 3, 5, 6, 7 5091*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 5092*c0909341SAndroid Build Coastguard Worker WRITE_8X4 4, [r3+16*2], [r3+16*1], [r3+16*0], 5, 6, 7 5093*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 5094*c0909341SAndroid Build Coastguard Worker ret 5095*c0909341SAndroid Build Coastguard Worker 5096*c0909341SAndroid Build Coastguard Worker 5097*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5098*c0909341SAndroid Build Coastguard Workercglobal_label .main_fast 5099*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*35] ;in1 5100*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4095x8)] ;t62,t63 5101*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_101x8)] ;t32,t33 5102*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 5103*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*35], m0 ;t32 5104*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*66], m3 ;t63 5105*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 401, 4076 ;t33a, t62a 5106*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*36], m3 ;t33a 5107*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*65], m0 ;t62a 5108*c0909341SAndroid Build Coastguard Worker 5109*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*37] ;in15 5110*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3822x8)] ;t60,t61 5111*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1474x8)] ;t34,t35 5112*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*38], m1 ;t35 5113*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*63], m2 ;t60 5114*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 0, 3, 7, m4076, 401 ;t34a, t61a 5115*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*37], m2 ;t34a 5116*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*64], m1 ;t61a 5117*c0909341SAndroid Build Coastguard Worker 5118*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*39] ;in9 5119*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3996x8)] ;t58,t59 5120*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_897x8)] ;t36,t37 5121*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*39], m0 ;t36 5122*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*62], m3 ;t59 5123*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 3166, 2598 ;t37a, t58a 5124*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*40], m3 ;t37a 5125*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*61], m0 ;t58a 5126*c0909341SAndroid Build Coastguard Worker 5127*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*41] ;in7 5128*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_4036x8)] ;t56,t57 5129*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m700x8)] ;t38,t39 5130*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*42], m1 ;t39 5131*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*59], m2 ;t56 5132*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 0, 3, 7, m2598, 3166 ;t38a, t57a 5133*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*41], m2 ;t38a 5134*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*60], m1 ;t57a 5135*c0909341SAndroid Build Coastguard Worker 5136*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*43] ;in5 5137*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4065x8)] ;t54,t55 5138*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_501x8)] ;t40,t41 5139*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*43], m0 ;t40 5140*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*58], m3 ;t55 5141*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 1931, 3612 ;t41a, t54a 5142*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*44], m3 ;t41a 5143*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*57], m0 ;t54a 5144*c0909341SAndroid Build Coastguard Worker 5145*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*45] ;in11 5146*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3948x8)] ;t52,t53 5147*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1092x8)] ;t42,t43 5148*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*46], m1 ;t43 5149*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*55], m2 ;t52 5150*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 0, 3, 7, m3612, 1931 ;t42a, t53a 5151*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*45], m2 ;t42a 5152*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*56], m1 ;t53a 5153*c0909341SAndroid Build Coastguard Worker 5154*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*47] ;in13 5155*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3889x8)] ;t50,t51 5156*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_1285x8)] ;t44,t45 5157*c0909341SAndroid Build Coastguard Worker mova m6, m0 5158*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*54], m3 ;t51 5159*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 0, 1, 2, 7, 3920, 1189 ;t45a, t50a 5160*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*48], m3 ;t45a 5161*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*53], m0 ;t50a 5162*c0909341SAndroid Build Coastguard Worker 5163*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*49] ;in3 5164*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4085x8)] ;t48,t49 5165*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_m301x8)] ;t46,t47 5166*c0909341SAndroid Build Coastguard Worker mova m4, m3 5167*c0909341SAndroid Build Coastguard Worker mova m5, m0 5168*c0909341SAndroid Build Coastguard Worker 5169*c0909341SAndroid Build Coastguard Worker jmp .main2 5170*c0909341SAndroid Build Coastguard Worker 5171*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5172*c0909341SAndroid Build Coastguard Workercglobal_label .main 5173*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*35] ;in1 5174*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*65] ;in31 5175*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4095x8)] ;t63a 5176*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_101x8)] ;t32a 5177*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_2967x8)] ;t62a 5178*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m2824x8)] ;t33a 5179*c0909341SAndroid Build Coastguard Worker mova m7, [o(pd_2048)] 5180*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t33 5181*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t32 5182*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t62 5183*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t63 5184*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 401, 4076 ;t33a, t62a 5185*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*35], m0 ;t32 5186*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*36], m5 ;t33a 5187*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*65], m4 ;t62a 5188*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*66], m3 ;t63 5189*c0909341SAndroid Build Coastguard Worker 5190*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*63] ;in17 5191*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*37] ;in15 5192*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3745x8)] ;t61a 5193*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_1660x8)] ;t34a 5194*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3822x8)] ;t60a 5195*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1474x8)] ;t35a 5196*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m0 ;t34 5197*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t35 5198*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;t61 5199*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t60 5200*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m4076, 401 ;t34a, t61a 5201*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*37], m5 ;t34a 5202*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*38], m0 ;t35 5203*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*63], m3 ;t60 5204*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*64], m4 ;t61a 5205*c0909341SAndroid Build Coastguard Worker 5206*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*39] ;in9 5207*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*61] ;in23 5208*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3996x8)] ;t59a 5209*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_897x8)] ;t36a 5210*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3461x8)] ;t58a 5211*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m2191x8)] ;t37a 5212*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t37 5213*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t36 5214*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t58 5215*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t59 5216*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 3166, 2598 ;t37a, t58a 5217*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*39], m0 ;t36 5218*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*40], m5 ;t37a 5219*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*61], m4 ;t58a 5220*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*62], m3 ;t59 5221*c0909341SAndroid Build Coastguard Worker 5222*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*59] ;in25 5223*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*41] ;in7 5224*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3349x8)] ;t57a 5225*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2359x8)] ;t38a 5226*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_4036x8)] ;t56a 5227*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m700x8)] ;t39a 5228*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m0 ;t38 5229*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t39 5230*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;t57 5231*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t56 5232*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m2598, 3166 ;t38a, t57a 5233*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*41], m5 ;t38a 5234*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*42], m0 ;t39 5235*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*59], m3 ;t56 5236*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*60], m4 ;t57a 5237*c0909341SAndroid Build Coastguard Worker 5238*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*43] ;in5 5239*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*57] ;in27 5240*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_4065x8)] ;t55a 5241*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_501x8)] ;t40a 5242*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3229x8)] ;t54a 5243*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m2520x8)] ;t41a 5244*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t41 5245*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t40 5246*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t54 5247*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t55 5248*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 1931, 3612 ;t41a, t54a 5249*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*43], m0 ;t40 5250*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*44], m5 ;t41a 5251*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*57], m4 ;t54a 5252*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*58], m3 ;t55 5253*c0909341SAndroid Build Coastguard Worker 5254*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*55] ;in21 5255*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*45] ;in11 5256*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3564x8)] ;t53a 5257*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2019x8)] ;t42a 5258*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3948x8)] ;t52a 5259*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1092x8)] ;t43a 5260*c0909341SAndroid Build Coastguard Worker psubsw m4, m1, m0 ;t42 5261*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t43 5262*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m3 ;t53 5263*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t52 5264*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, m3612, 1931 ;t42a, t53a 5265*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*45], m5 ;t42a 5266*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*46], m0 ;t43 5267*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*55], m3 ;t52 5268*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*56], m4 ;t53a 5269*c0909341SAndroid Build Coastguard Worker 5270*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*47] ;in13 5271*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*53] ;in19 5272*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3889x8)] ;t51a 5273*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_1285x8)] ;t44a 5274*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_3659x8)] ;t50a 5275*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m1842x8)] ;t45a 5276*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ;t45 5277*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t44 5278*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m2 ;t50 5279*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t51 5280*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 1, 2, 7, 3920, 1189 ;t45a, t50a 5281*c0909341SAndroid Build Coastguard Worker mova m6, m0 5282*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*48], m5 ;t45a 5283*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*53], m4 ;t50a 5284*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*54], m3 ;t51 5285*c0909341SAndroid Build Coastguard Worker 5286*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*51] ;in29 5287*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*49] ;in3 5288*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, [o(pw_3102x8)] ;t49a 5289*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [o(pw_2675x8)] ;t46a 5290*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [o(pw_4085x8)] ;t48a 5291*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [o(pw_m301x8)] ;t47a 5292*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m0 ;t46 5293*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ;t47 5294*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m3 ;t49 5295*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ;t48 5296*c0909341SAndroid Build Coastguard Worker 5297*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5298*c0909341SAndroid Build Coastguard Worker.main2: 5299*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 5, 1, 2, 7, m1189, 3920 ;t46a, t49a 5300*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*54] ;t51 5301*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m6 ;t44a 5302*c0909341SAndroid Build Coastguard Worker paddsw m0, m6 ;t47a 5303*c0909341SAndroid Build Coastguard Worker psubsw m6, m3, m1 ;t51a 5304*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t48a 5305*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*50], m0 ;t47a 5306*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*51], m3 ;t48a 5307*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 2, 0, 3, 7, m2276, 3406 ;t44, t51 5308*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*47], m6 ;t44 5309*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*54], m2 ;t51 5310*c0909341SAndroid Build Coastguard Worker 5311*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*48] ;t45a 5312*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*53] ;t50a 5313*c0909341SAndroid Build Coastguard Worker psubsw m2, m4, m0 ;t45 5314*c0909341SAndroid Build Coastguard Worker paddsw m4, m0 ;t46 5315*c0909341SAndroid Build Coastguard Worker psubsw m6, m5, m3 ;t50 5316*c0909341SAndroid Build Coastguard Worker paddsw m5, m3 ;t49 5317*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 2, 0, 3, 7, m2276, 3406 ;t45a, t50a 5318*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*48], m6 ;t45a 5319*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*49], m4 ;t46 5320*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*52], m5 ;t49 5321*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*53], m2 ;t50a 5322*c0909341SAndroid Build Coastguard Worker 5323*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*43] ;t40 5324*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*46] ;t43 5325*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*55] ;t52 5326*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*58] ;t55 5327*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t43a 5328*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t40a 5329*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t52a 5330*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t55a 5331*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 3406, 2276 ;t43, t52 5332*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*43], m0 ;t40a 5333*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*46], m5 ;t43 5334*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*55], m4 ;t52 5335*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*58], m1 ;t55a 5336*c0909341SAndroid Build Coastguard Worker 5337*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*44] ;t41a 5338*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*45] ;t42a 5339*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*56] ;t53a 5340*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*57] ;t54a 5341*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t42 5342*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t41 5343*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t53 5344*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t54 5345*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 3406, 2276 ;t42a, t53a 5346*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*44], m0 ;t41 5347*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*45], m5 ;t42a 5348*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*56], m4 ;t53a 5349*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*57], m1 ;t54 5350*c0909341SAndroid Build Coastguard Worker 5351*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*41] ;t38a 5352*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*40] ;t37a 5353*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*61] ;t58a 5354*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*60] ;t57a 5355*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t37 5356*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t38 5357*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t58 5358*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t57 5359*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m4017, 799 ;t37a, t58a 5360*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*41], m0 ;t38 5361*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*40], m5 ;t37a 5362*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*61], m4 ;t58a 5363*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*60], m1 ;t57 5364*c0909341SAndroid Build Coastguard Worker 5365*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*42] ;t39 5366*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*39] ;t36 5367*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*62] ;t59 5368*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*59] ;t56 5369*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t36a 5370*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t39a 5371*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t59a 5372*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t56a 5373*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m4017, 799 ;t36, t59 5374*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*42], m0 ;t39a 5375*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*39], m5 ;t36 5376*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*62], m4 ;t59 5377*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*59], m1 ;t56a 5378*c0909341SAndroid Build Coastguard Worker 5379*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*35] ;t32 5380*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*38] ;t35 5381*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*63] ;t60 5382*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*66] ;t63 5383*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t35a 5384*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t32a 5385*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t60a 5386*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t63a 5387*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 799, 4017 ;t35, t60 5388*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*35], m0 ;t32a 5389*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*38], m5 ;t35 5390*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*63], m4 ;t60 5391*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*66], m1 ;t63a 5392*c0909341SAndroid Build Coastguard Worker 5393*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*36] ;t33a 5394*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*37] ;t34a 5395*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*64] ;t61a 5396*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*65] ;t62a 5397*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t34 5398*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t33 5399*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t61 5400*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t62 5401*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 799, 4017 ;t34a, t61a 5402*c0909341SAndroid Build Coastguard Worker 5403*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*41] ;t38 5404*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*60] ;t57 5405*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m2 ;t38a 5406*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t33a 5407*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m3 ;t57a 5408*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t62a 5409*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*36], m0 ;t33a 5410*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*65], m1 ;t62a 5411*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 6, 0, 3, 7, 1567, 3784 ;t38, t57 5412*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*41], m2 ;t38 5413*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*60], m6 ;t57 5414*c0909341SAndroid Build Coastguard Worker 5415*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*40] ;t37 5416*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*61] ;t58 5417*c0909341SAndroid Build Coastguard Worker psubsw m0, m5, m2 ;t37 5418*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 ;t34 5419*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m3 ;t58 5420*c0909341SAndroid Build Coastguard Worker paddsw m4, m3 ;t61 5421*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 0, 2, 3, 7, 1567, 3784 ;t37a, t58a 5422*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*37], m5 ;t34 5423*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*64], m4 ;t61 5424*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*40], m1 ;t37a 5425*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*61], m0 ;t58a 5426*c0909341SAndroid Build Coastguard Worker 5427*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*38] ;t35 5428*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*39] ;t36 5429*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*62] ;t59 5430*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*63] ;t60 5431*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t36a 5432*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t35a 5433*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t59a 5434*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t60a 5435*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 1567, 3784 ;t36, t59 5436*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*38], m0 ;t35a 5437*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*39], m5 ;t36 5438*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*62], m4 ;t59 5439*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*63], m1 ;t60a 5440*c0909341SAndroid Build Coastguard Worker 5441*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*35] ;t32a 5442*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*42] ;t39a 5443*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*59] ;t56a 5444*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*66] ;t63a 5445*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t39 5446*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t32 5447*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t56 5448*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t63 5449*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, 1567, 3784 ;t39a, t56a 5450*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*35], m0 ;t32 5451*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*42], m5 ;t39a 5452*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*59], m4 ;t56a 5453*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*66], m1 ;t63 5454*c0909341SAndroid Build Coastguard Worker 5455*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*50] ;t47a 5456*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*43] ;t40a 5457*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*58] ;t55a 5458*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*51] ;t48a 5459*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t40 5460*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t47 5461*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t55 5462*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t48 5463*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m3784, 1567 ;t40a, t55a 5464*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*50], m0 ;t47 5465*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*43], m5 ;t40a 5466*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*58], m4 ;t55a 5467*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*51], m1 ;t48 5468*c0909341SAndroid Build Coastguard Worker 5469*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*49] ;t46 5470*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*44] ;t41 5471*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*57] ;t54 5472*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*52] ;t49 5473*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t41a 5474*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t46a 5475*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t54a 5476*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t49a 5477*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m3784, 1567 ;t41, t54 5478*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*49], m0 ;t46a 5479*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*44], m5 ;t41 5480*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*57], m4 ;t54 5481*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*52], m1 ;t49a 5482*c0909341SAndroid Build Coastguard Worker 5483*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*48] ;t45a 5484*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*45] ;t42a 5485*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*56] ;t53a 5486*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*53] ;t50a 5487*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t42 5488*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t45 5489*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t53 5490*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t50 5491*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m3784, 1567 ;t42a, t53a 5492*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*48], m0 ;t45 5493*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*45], m5 ;t42a 5494*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*56], m4 ;t53a 5495*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*53], m1 ;t50 5496*c0909341SAndroid Build Coastguard Worker 5497*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*47] ;t44 5498*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*46] ;t43 5499*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*55] ;t52 5500*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*54] ;t51 5501*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ;t43a 5502*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ;t44a 5503*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t52a 5504*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t51a 5505*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 2, 3, 7, m3784, 1567 ;t43, t52 5506*c0909341SAndroid Build Coastguard Worker 5507*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*38] ;t35a 5508*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*31] ;tmp[28] 5509*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;t44 5510*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;t35 5511*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m2 ;out35 5512*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;out28 5513*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*63] ;t60a 5514*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*38], m0 ;out35 5515*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*31], m2 ;out28 5516*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;t51 5517*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;t60 5518*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 6, 1, 2, 7, 2896, 2896 ;t44a, t51a 5519*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*6 ] ;tmp[3] 5520*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m3 ;out60 5521*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ;out3 5522*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*22] ;tmp[19] 5523*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*63], m1 ;out60 5524*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*6 ], m2 ;out3 5525*c0909341SAndroid Build Coastguard Worker psubsw m1, m3, m0 ;out44 5526*c0909341SAndroid Build Coastguard Worker paddsw m3, m0 ;out19 5527*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*15] ;tmp[12] 5528*c0909341SAndroid Build Coastguard Worker 5529*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*39] ;t36 5530*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*47], m1 ;out44 5531*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*22], m3 ;out19 5532*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*62] ;t59 5533*c0909341SAndroid Build Coastguard Worker psubsw m3, m2, m6 ;out51 5534*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 ;out12 5535*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*54], m3 ;out51 5536*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*15], m2 ;out12 5537*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;t43a 5538*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t36a 5539*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*30] ;tmp[27] 5540*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;t52a 5541*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;t59a 5542*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 2, 4, 6, 7, 2896, 2896 ;t43, t52 5543*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize*2+16*7 ] ;tmp[4 ] 5544*c0909341SAndroid Build Coastguard Worker psubsw m6, m5, m0 ;out36 5545*c0909341SAndroid Build Coastguard Worker paddsw m5, m0 ;out27 5546*c0909341SAndroid Build Coastguard Worker psubsw m0, m4, m1 ;out59 5547*c0909341SAndroid Build Coastguard Worker paddsw m4, m1 ;out4 5548*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*39], m6 ;out36 5549*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*30], m5 ;out27 5550*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*62], m0 ;out59 5551*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*7 ], m4 ;out4 5552*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*23] ;tmp[20] 5553*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*14] ;tmp[11] 5554*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m3 ;out43 5555*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ;out20 5556*c0909341SAndroid Build Coastguard Worker psubsw m6, m5, m2 ;out52 5557*c0909341SAndroid Build Coastguard Worker paddsw m5, m2 ;out11 5558*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*46], m4 ;out43 5559*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*23], m0 ;out20 5560*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*55], m6 ;out52 5561*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*14], m5 ;out11 5562*c0909341SAndroid Build Coastguard Worker 5563*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*40] ;t37a 5564*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*45] ;t42a 5565*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*56] ;t53a 5566*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*61] ;t58a 5567*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*29] ;tmp[26] 5568*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t42 5569*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t37 5570*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t53 5571*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t58 5572*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t43, t52 5573*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*8 ] ;tmp[5 ] 5574*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out37 5575*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out26 5576*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out58 5577*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out5 5578*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*40], m6 ;out37 5579*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*29], m2 ;out26 5580*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*61], m0 ;out58 5581*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*8 ], m3 ;out5 5582*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*24] ;tmp[21] 5583*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*13] ;tmp[10] 5584*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out42 5585*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out21 5586*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out53 5587*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out10 5588*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*45], m2 ;out42 5589*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*24], m0 ;out21 5590*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*56], m3 ;out53 5591*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*13], m1 ;out10 5592*c0909341SAndroid Build Coastguard Worker 5593*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*41] ;t38 5594*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*44] ;t41 5595*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*57] ;t54 5596*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*60] ;t57 5597*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*28] ;tmp[25] 5598*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t41a 5599*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t38a 5600*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t54a 5601*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t57a 5602*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t41a, t54a 5603*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*9 ] ;tmp[6 ] 5604*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out38 5605*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out25 5606*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out57 5607*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out6 5608*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*41], m6 ;out38 5609*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*28], m2 ;out25 5610*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*60], m0 ;out57 5611*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*9 ], m3 ;out6 5612*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*25] ;tmp[22] 5613*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*12] ;tmp[9 ] 5614*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out41 5615*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out22 5616*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out54 5617*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out9 5618*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*44], m2 ;out41 5619*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*25], m0 ;out22 5620*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*57], m3 ;out54 5621*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*12], m1 ;out9 5622*c0909341SAndroid Build Coastguard Worker 5623*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*42] ;t39a 5624*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*43] ;t40a 5625*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*58] ;t55a 5626*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*59] ;t56a 5627*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*27] ;tmp[24] 5628*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t40 5629*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t39 5630*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t55 5631*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t56 5632*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t40a, t55a 5633*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*10] ;tmp[7 ] 5634*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out39 5635*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out24 5636*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out56 5637*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out7 5638*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*42], m6 ;out39 5639*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*27], m2 ;out24 5640*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*59], m0 ;out56 5641*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*10], m3 ;out7 5642*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*26] ;tmp[23] 5643*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*11] ;tmp[8 ] 5644*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out40 5645*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out23 5646*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out55 5647*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out8 5648*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*43], m2 ;out40 5649*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*26], m0 ;out23 5650*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*58], m3 ;out55 5651*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*11], m1 ;out8 5652*c0909341SAndroid Build Coastguard Worker 5653*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*37] ;t34 5654*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*48] ;t45 5655*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*53] ;t50 5656*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*64] ;t61 5657*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*32] ;tmp[29] 5658*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t45a 5659*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t34a 5660*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t50a 5661*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t61a 5662*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t45, t50 5663*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*5 ] ;tmp[2 ] 5664*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out34 5665*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out29 5666*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out61 5667*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out2 5668*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*37], m6 ;out34 5669*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*32], m2 ;out29 5670*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*64], m0 ;out61 5671*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*5 ], m3 ;out2 5672*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*21] ;tmp[18] 5673*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*16] ;tmp[13] 5674*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out45 5675*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out18 5676*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out50 5677*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out13 5678*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*48], m2 ;out45 5679*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*21], m0 ;out18 5680*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*53], m3 ;out50 5681*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*16], m1 ;out13 5682*c0909341SAndroid Build Coastguard Worker 5683*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*36] ;t33a 5684*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*49] ;t46a 5685*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*52] ;t49a 5686*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*65] ;t62a 5687*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*33] ;tmp[30] 5688*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t46 5689*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t33 5690*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t49 5691*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t62 5692*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t45, t50 5693*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*4 ] ;tmp[1 ] 5694*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out33 5695*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out30 5696*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out62 5697*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out1 5698*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*36], m6 ;out33 5699*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*33], m2 ;out30 5700*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*65], m0 ;out62 5701*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*4 ], m3 ;out1 5702*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*20] ;tmp[17] 5703*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*17] ;tmp[14] 5704*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out46 5705*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out17 5706*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out49 5707*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out14 5708*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*49], m2 ;out46 5709*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*20], m0 ;out17 5710*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*52], m3 ;out49 5711*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*17], m1 ;out14 5712*c0909341SAndroid Build Coastguard Worker 5713*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*35] ;t32 5714*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize*2+16*50] ;t47 5715*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*51] ;t48 5716*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*66] ;t63 5717*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize*2+16*34] ;tmp[31] 5718*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ;t47a 5719*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;t32a 5720*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ;t48a 5721*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ;t63a 5722*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 4, 3, 6, 7, 2896, 2896 ;t47, t48 5723*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize*2+16*3 ] ;tmp[0 ] 5724*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m0 ;out32 5725*c0909341SAndroid Build Coastguard Worker paddsw m2, m0 ;out31 5726*c0909341SAndroid Build Coastguard Worker psubsw m0, m3, m1 ;out63 5727*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ;out0 5728*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*35], m6 ;out32 5729*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*34], m2 ;out31 5730*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*66], m0 ;out63 5731*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*3 ], m3 ;out0 5732*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize*2+16*19] ;tmp[16] 5733*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize*2+16*18] ;tmp[15] 5734*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m5 ;out47 5735*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ;out16 5736*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ;out48 5737*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ;out15 5738*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*50], m2 ;out47 5739*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*19], m0 ;out16 5740*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*51], m3 ;out48 5741*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize*2+16*18], m1 ;out15 5742*c0909341SAndroid Build Coastguard Worker ret 5743*c0909341SAndroid Build Coastguard Worker 5744*c0909341SAndroid Build Coastguard Worker 5745*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_8bpc, 4, 6, 8, 16*132, dst, stride, coeff, eob, tx2 5746*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 5747*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 5748*c0909341SAndroid Build Coastguard Worker%endif 5749*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5750*c0909341SAndroid Build Coastguard Worker jz .dconly 5751*c0909341SAndroid Build Coastguard Worker 5752*c0909341SAndroid Build Coastguard Worker call m(idct_64x16_internal_8bpc) 5753*c0909341SAndroid Build Coastguard Worker RET 5754*c0909341SAndroid Build Coastguard Worker 5755*c0909341SAndroid Build Coastguard Worker.dconly: 5756*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 5757*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 5758*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 5759*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 5760*c0909341SAndroid Build Coastguard Worker mov r3d, 16 5761*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 5762*c0909341SAndroid Build Coastguard Worker 5763*c0909341SAndroid Build Coastguard Worker.body: 5764*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 5765*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_2048)] ;intentionally rip-relative 5766*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 5767*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 5768*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 5769*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m0 5770*c0909341SAndroid Build Coastguard Worker pxor m7, m7 5771*c0909341SAndroid Build Coastguard Worker 5772*c0909341SAndroid Build Coastguard Worker.loop: 5773*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*0] 5774*c0909341SAndroid Build Coastguard Worker mova m3, [dstq+16*1] 5775*c0909341SAndroid Build Coastguard Worker mova m5, [dstq+16*2] 5776*c0909341SAndroid Build Coastguard Worker mova m6, [dstq+16*3] 5777*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m1, m7 5778*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m7 5779*c0909341SAndroid Build Coastguard Worker punpckhbw m4, m3, m7 5780*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m7 5781*c0909341SAndroid Build Coastguard Worker paddw m2, m0 5782*c0909341SAndroid Build Coastguard Worker paddw m1, m0 5783*c0909341SAndroid Build Coastguard Worker paddw m4, m0 5784*c0909341SAndroid Build Coastguard Worker paddw m3, m0 5785*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 5786*c0909341SAndroid Build Coastguard Worker packuswb m3, m4 5787*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m5, m7 5788*c0909341SAndroid Build Coastguard Worker punpcklbw m5, m7 5789*c0909341SAndroid Build Coastguard Worker punpckhbw m4, m6, m7 5790*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m7 5791*c0909341SAndroid Build Coastguard Worker paddw m2, m0 5792*c0909341SAndroid Build Coastguard Worker paddw m5, m0 5793*c0909341SAndroid Build Coastguard Worker paddw m4, m0 5794*c0909341SAndroid Build Coastguard Worker paddw m6, m0 5795*c0909341SAndroid Build Coastguard Worker packuswb m5, m2 5796*c0909341SAndroid Build Coastguard Worker packuswb m6, m4 5797*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m1 5798*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m3 5799*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m5 5800*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m6 5801*c0909341SAndroid Build Coastguard Worker add dstq, strideq 5802*c0909341SAndroid Build Coastguard Worker dec r3d 5803*c0909341SAndroid Build Coastguard Worker jg .loop 5804*c0909341SAndroid Build Coastguard Worker jmp tx2q 5805*c0909341SAndroid Build Coastguard Worker 5806*c0909341SAndroid Build Coastguard Worker.end: 5807*c0909341SAndroid Build Coastguard Worker RET 5808*c0909341SAndroid Build Coastguard Worker 5809*c0909341SAndroid Build Coastguard Worker 5810*c0909341SAndroid Build Coastguard Worker%macro LOAD_4ROWS 2-3 0 ;src, stride, is_rect2 5811*c0909341SAndroid Build Coastguard Worker 5812*c0909341SAndroid Build Coastguard Worker%if %3 5813*c0909341SAndroid Build Coastguard Worker mova m3, [o(pw_2896x8)] 5814*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, [%1+%2*0] 5815*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, [%1+%2*1] 5816*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, [%1+%2*2] 5817*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, [%1+%2*3] 5818*c0909341SAndroid Build Coastguard Worker%else 5819*c0909341SAndroid Build Coastguard Worker mova m0, [%1+%2*0] 5820*c0909341SAndroid Build Coastguard Worker mova m1, [%1+%2*1] 5821*c0909341SAndroid Build Coastguard Worker mova m2, [%1+%2*2] 5822*c0909341SAndroid Build Coastguard Worker mova m3, [%1+%2*3] 5823*c0909341SAndroid Build Coastguard Worker%endif 5824*c0909341SAndroid Build Coastguard Worker%endmacro 5825*c0909341SAndroid Build Coastguard Worker 5826*c0909341SAndroid Build Coastguard Worker%macro LOAD_4ROWS_H 2 ;src, stride 5827*c0909341SAndroid Build Coastguard Worker mova m4, [%1+%2*0] 5828*c0909341SAndroid Build Coastguard Worker mova m5, [%1+%2*1] 5829*c0909341SAndroid Build Coastguard Worker mova m6, [%1+%2*2] 5830*c0909341SAndroid Build Coastguard Worker mova m7, [%1+%2*3] 5831*c0909341SAndroid Build Coastguard Worker%endmacro 5832*c0909341SAndroid Build Coastguard Worker 5833*c0909341SAndroid Build Coastguard Workercglobal idct_64x16_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 5834*c0909341SAndroid Build Coastguard Worker mov r3d, 2 5835*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], dstq 5836*c0909341SAndroid Build Coastguard Worker lea dstq, [rsp+gprsize+16*68] 5837*c0909341SAndroid Build Coastguard Worker 5838*c0909341SAndroid Build Coastguard Worker.pass1_loop: 5839*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+32*0, 32*8 5840*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5841*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5842*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 5843*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 5844*c0909341SAndroid Build Coastguard Worker 5845*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5846*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+32*4, 32*8 5847*c0909341SAndroid Build Coastguard Worker 5848*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5849*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 5850*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 5851*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 5852*c0909341SAndroid Build Coastguard Worker 5853*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+32*2, 32*4 5854*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 5855*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 5856*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 5857*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 5858*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 5859*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 5860*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 5861*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 5862*c0909341SAndroid Build Coastguard Worker 5863*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 5864*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*3, 16 5865*c0909341SAndroid Build Coastguard Worker 5866*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+32*1, 32*2 5867*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*35], m0 ;in1 5868*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*49], m1 ;in3 5869*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*43], m2 ;in5 5870*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*41], m3 ;in7 5871*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*39], m4 ;in9 5872*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*45], m5 ;in11 5873*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*47], m6 ;in13 5874*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*37], m7 ;in15 5875*c0909341SAndroid Build Coastguard Worker 5876*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+32*17, 32*2 5877*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*63], m0 ;in17 5878*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*53], m1 ;in19 5879*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*55], m2 ;in21 5880*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*61], m3 ;in23 5881*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*59], m4 ;in25 5882*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*57], m5 ;in27 5883*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*51], m6 ;in29 5884*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*65], m7 ;in31 5885*c0909341SAndroid Build Coastguard Worker 5886*c0909341SAndroid Build Coastguard Worker call m(idct_16x64_internal_8bpc).main 5887*c0909341SAndroid Build Coastguard Worker 5888*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 5889*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5890*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5891*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 5892*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5893*c0909341SAndroid Build Coastguard Worker 5894*c0909341SAndroid Build Coastguard Worker.pass1_end: 5895*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+32*0, 32 5896*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 5897*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5898*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5899*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 5900*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5901*c0909341SAndroid Build Coastguard Worker 5902*c0909341SAndroid Build Coastguard Worker.pass1_end1: 5903*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+32*8, 32 5904*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 5905*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5906*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5907*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 5908*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5909*c0909341SAndroid Build Coastguard Worker 5910*c0909341SAndroid Build Coastguard Worker.pass1_end2: 5911*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+32*16, 32 5912*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 5913*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5914*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5915*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 5916*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5917*c0909341SAndroid Build Coastguard Worker 5918*c0909341SAndroid Build Coastguard Worker.pass1_end3: 5919*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+32*24, 32 5920*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*35, 16 5921*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5922*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5923*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 5924*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5925*c0909341SAndroid Build Coastguard Worker 5926*c0909341SAndroid Build Coastguard Worker.pass1_end4: 5927*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+32*0, 32 5928*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*43, 16 5929*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5930*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5931*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end5)] 5932*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5933*c0909341SAndroid Build Coastguard Worker 5934*c0909341SAndroid Build Coastguard Worker.pass1_end5: 5935*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+32*8, 32 5936*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*51, 16 5937*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5938*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5939*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end6)] 5940*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5941*c0909341SAndroid Build Coastguard Worker 5942*c0909341SAndroid Build Coastguard Worker.pass1_end6: 5943*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+32*16, 32 5944*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*59, 16 5945*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5946*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 5947*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end7)] 5948*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 5949*c0909341SAndroid Build Coastguard Worker 5950*c0909341SAndroid Build Coastguard Worker.pass1_end7: 5951*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+32*24, 32 5952*c0909341SAndroid Build Coastguard Worker 5953*c0909341SAndroid Build Coastguard Worker add coeffq, 16 5954*c0909341SAndroid Build Coastguard Worker add dstq, 16 5955*c0909341SAndroid Build Coastguard Worker dec r3d 5956*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 5957*c0909341SAndroid Build Coastguard Worker 5958*c0909341SAndroid Build Coastguard Worker.pass2: 5959*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 5960*c0909341SAndroid Build Coastguard Worker sub coeffq, 32 5961*c0909341SAndroid Build Coastguard Worker mov r3d, 4 5962*c0909341SAndroid Build Coastguard Worker 5963*c0909341SAndroid Build Coastguard Worker.pass2_loop: 5964*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], r3d 5965*c0909341SAndroid Build Coastguard Worker 5966*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+16*0, 32*2 5967*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS_H coeffq+16*1, 32*2 5968*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 5969*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 5970*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+16*2, 32*2 5971*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS_H coeffq+16*3, 32*2 5972*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 5973*c0909341SAndroid Build Coastguard Worker 5974*c0909341SAndroid Build Coastguard Worker mov r3, dstq 5975*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 5976*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 5977*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 5978*c0909341SAndroid Build Coastguard Worker 5979*c0909341SAndroid Build Coastguard Worker.end: 5980*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 5981*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 5982*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end1)] 5983*c0909341SAndroid Build Coastguard Worker mov dstq, r3 5984*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 5985*c0909341SAndroid Build Coastguard Worker 5986*c0909341SAndroid Build Coastguard Worker.end1: 5987*c0909341SAndroid Build Coastguard Worker pxor m7, m7 5988*c0909341SAndroid Build Coastguard Worker REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 5989*c0909341SAndroid Build Coastguard Worker 5990*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 5991*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*1+16*67] 5992*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 5993*c0909341SAndroid Build Coastguard Worker add dstq, 8 5994*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], dstq 5995*c0909341SAndroid Build Coastguard Worker dec r3d 5996*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 5997*c0909341SAndroid Build Coastguard Worker 5998*c0909341SAndroid Build Coastguard Worker mov r3d, 4 5999*c0909341SAndroid Build Coastguard Worker lea coeffq, [rsp+gprsize+16*68] 6000*c0909341SAndroid Build Coastguard Worker.pass2_loop2: 6001*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], r3d 6002*c0909341SAndroid Build Coastguard Worker 6003*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+16*0, 32*2 6004*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS_H coeffq+16*1, 32*2 6005*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 6006*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 6007*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+16*2, 32*2 6008*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS_H coeffq+16*3, 32*2 6009*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6010*c0909341SAndroid Build Coastguard Worker 6011*c0909341SAndroid Build Coastguard Worker mov r3, dstq 6012*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end2)] 6013*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 6014*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 6015*c0909341SAndroid Build Coastguard Worker 6016*c0909341SAndroid Build Coastguard Worker.end2: 6017*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 6018*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6019*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end3)] 6020*c0909341SAndroid Build Coastguard Worker mov dstq, r3 6021*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).end 6022*c0909341SAndroid Build Coastguard Worker 6023*c0909341SAndroid Build Coastguard Worker.end3: 6024*c0909341SAndroid Build Coastguard Worker 6025*c0909341SAndroid Build Coastguard Worker add coeffq, 16*16 6026*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*1+16*67] 6027*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 6028*c0909341SAndroid Build Coastguard Worker add dstq, 8 6029*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], dstq 6030*c0909341SAndroid Build Coastguard Worker dec r3d 6031*c0909341SAndroid Build Coastguard Worker jg .pass2_loop2 6032*c0909341SAndroid Build Coastguard Worker ret 6033*c0909341SAndroid Build Coastguard Worker 6034*c0909341SAndroid Build Coastguard Worker 6035*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_8bpc, 4, 6, 8, 16*68, dst, stride, coeff, eob, tx2 6036*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6037*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6038*c0909341SAndroid Build Coastguard Worker%endif 6039*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6040*c0909341SAndroid Build Coastguard Worker jz .dconly 6041*c0909341SAndroid Build Coastguard Worker call m(idct_32x64_internal_8bpc) 6042*c0909341SAndroid Build Coastguard Worker.end: 6043*c0909341SAndroid Build Coastguard Worker RET 6044*c0909341SAndroid Build Coastguard Worker 6045*c0909341SAndroid Build Coastguard Worker.dconly: 6046*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 6047*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 6048*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 6049*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 6050*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 6051*c0909341SAndroid Build Coastguard Worker mov r3d, 64 6052*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 6053*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).body 6054*c0909341SAndroid Build Coastguard Worker 6055*c0909341SAndroid Build Coastguard Worker 6056*c0909341SAndroid Build Coastguard Workercglobal idct_32x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 6057*c0909341SAndroid Build Coastguard Worker mov r4d, 2 6058*c0909341SAndroid Build Coastguard Worker sub eobd, 136 6059*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], eobd 6060*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6061*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d 6062*c0909341SAndroid Build Coastguard Worker 6063*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6064*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6065*c0909341SAndroid Build Coastguard Worker%endif 6066*c0909341SAndroid Build Coastguard Worker 6067*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], coeffq 6068*c0909341SAndroid Build Coastguard Worker 6069*c0909341SAndroid Build Coastguard Worker.pass1_loop: 6070*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*1, 64*2, 1 6071*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 ;in1 6072*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 ;in3 6073*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 ;in5 6074*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 ;in7 6075*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 ;in9 6076*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 ;in11 6077*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 ;in13 6078*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 ;in15 6079*c0909341SAndroid Build Coastguard Worker 6080*c0909341SAndroid Build Coastguard Worker mov tx2d, [rsp+gprsize*1+16*67] 6081*c0909341SAndroid Build Coastguard Worker test tx2d, tx2d 6082*c0909341SAndroid Build Coastguard Worker jl .fast 6083*c0909341SAndroid Build Coastguard Worker 6084*c0909341SAndroid Build Coastguard Worker.full: 6085*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*0, 64*4, 1 6086*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 6087*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 6088*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*2, 64*4, 1 6089*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6090*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 6091*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 6092*c0909341SAndroid Build Coastguard Worker 6093*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*17, 64*2, 1 6094*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*33], m0 ;in17 6095*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*28], m1 ;in19 6096*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*29], m2 ;in21 6097*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*32], m3 ;in23 6098*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*31], m4 ;in25 6099*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*30], m5 ;in27 6100*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*27], m6 ;in29 6101*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*34], m7 ;in31 6102*c0909341SAndroid Build Coastguard Worker 6103*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main 6104*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 6105*c0909341SAndroid Build Coastguard Worker 6106*c0909341SAndroid Build Coastguard Worker.fast: 6107*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq, 256, 1 6108*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6109*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6110*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 6111*c0909341SAndroid Build Coastguard Worker 6112*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 6113*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+128*1, 256, 1 6114*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6115*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6116*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6117*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 6118*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 6119*c0909341SAndroid Build Coastguard Worker 6120*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 6121*c0909341SAndroid Build Coastguard Worker 6122*c0909341SAndroid Build Coastguard Worker.pass1_end: 6123*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6124*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 6125*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6126*c0909341SAndroid Build Coastguard Worker 6127*c0909341SAndroid Build Coastguard Worker.pass1_end1: 6128*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*0, 64 6129*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 6130*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6131*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 6132*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6133*c0909341SAndroid Build Coastguard Worker 6134*c0909341SAndroid Build Coastguard Worker.pass1_end2: 6135*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*8, 64 6136*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 6137*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6138*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 6139*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6140*c0909341SAndroid Build Coastguard Worker 6141*c0909341SAndroid Build Coastguard Worker.pass1_end3: 6142*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*16, 64 6143*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 6144*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6145*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 6146*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6147*c0909341SAndroid Build Coastguard Worker 6148*c0909341SAndroid Build Coastguard Worker.pass1_end4: 6149*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*24, 64 6150*c0909341SAndroid Build Coastguard Worker 6151*c0909341SAndroid Build Coastguard Worker add coeffq, 16 6152*c0909341SAndroid Build Coastguard Worker dec r3d 6153*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 6154*c0909341SAndroid Build Coastguard Worker 6155*c0909341SAndroid Build Coastguard Worker.pass2: 6156*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*67] 6157*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6158*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 6159*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 6160*c0909341SAndroid Build Coastguard Worker lea r4, [o(m(idct_16x64_internal_8bpc).end1)] 6161*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x64_internal_8bpc).pass2_loop 6162*c0909341SAndroid Build Coastguard Worker 6163*c0909341SAndroid Build Coastguard Worker 6164*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_8bpc, 4, 6, 8, 16*197, dst, stride, coeff, eob, tx2 6165*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6166*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6167*c0909341SAndroid Build Coastguard Worker%endif 6168*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6169*c0909341SAndroid Build Coastguard Worker jz .dconly 6170*c0909341SAndroid Build Coastguard Worker call m(idct_64x32_internal_8bpc) 6171*c0909341SAndroid Build Coastguard Worker.end: 6172*c0909341SAndroid Build Coastguard Worker RET 6173*c0909341SAndroid Build Coastguard Worker 6174*c0909341SAndroid Build Coastguard Worker.dconly: 6175*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 6176*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 6177*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_16384)] 6178*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 6179*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 6180*c0909341SAndroid Build Coastguard Worker mov r3d, 32 6181*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.end)] 6182*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_8bpc).body 6183*c0909341SAndroid Build Coastguard Worker 6184*c0909341SAndroid Build Coastguard Worker 6185*c0909341SAndroid Build Coastguard Workercglobal idct_64x32_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 6186*c0909341SAndroid Build Coastguard Worker mov r4d, 2 6187*c0909341SAndroid Build Coastguard Worker sub eobd, 136 6188*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], eobd 6189*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6190*c0909341SAndroid Build Coastguard Worker cmovs r3d, r4d 6191*c0909341SAndroid Build Coastguard Worker 6192*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6193*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6194*c0909341SAndroid Build Coastguard Worker%endif 6195*c0909341SAndroid Build Coastguard Worker 6196*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], coeffq 6197*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*67], dstq 6198*c0909341SAndroid Build Coastguard Worker lea dstq, [rsp+gprsize+16*69] 6199*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*4+16*67], dstq 6200*c0909341SAndroid Build Coastguard Worker 6201*c0909341SAndroid Build Coastguard Worker.pass1_loop: 6202*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+64*0, 64*8, 1 6203*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6204*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6205*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 6206*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 6207*c0909341SAndroid Build Coastguard Worker 6208*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6209*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+64*4, 64*8, 1 6210*c0909341SAndroid Build Coastguard Worker 6211*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6212*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6213*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 6214*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 6215*c0909341SAndroid Build Coastguard Worker 6216*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*2, 64*4, 1 6217*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 6218*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 6219*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 6220*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 6221*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 6222*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 6223*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 6224*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 6225*c0909341SAndroid Build Coastguard Worker 6226*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 6227*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*3, 16 6228*c0909341SAndroid Build Coastguard Worker 6229*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*1, 64*2, 1 6230*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*35], m0 ;in1 6231*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*49], m1 ;in3 6232*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*43], m2 ;in5 6233*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*41], m3 ;in7 6234*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*39], m4 ;in9 6235*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*45], m5 ;in11 6236*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*47], m6 ;in13 6237*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*37], m7 ;in15 6238*c0909341SAndroid Build Coastguard Worker 6239*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*17, 64*2, 1 6240*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*63], m0 ;in17 6241*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*53], m1 ;in19 6242*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*55], m2 ;in21 6243*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*61], m3 ;in23 6244*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*59], m4 ;in25 6245*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*57], m5 ;in27 6246*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*51], m6 ;in29 6247*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*65], m7 ;in31 6248*c0909341SAndroid Build Coastguard Worker 6249*c0909341SAndroid Build Coastguard Worker call m(idct_16x64_internal_8bpc).main 6250*c0909341SAndroid Build Coastguard Worker 6251*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 6252*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6253*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 6254*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6255*c0909341SAndroid Build Coastguard Worker 6256*c0909341SAndroid Build Coastguard Worker.pass1_end: 6257*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*0, 64 6258*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 6259*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6260*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 6261*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6262*c0909341SAndroid Build Coastguard Worker 6263*c0909341SAndroid Build Coastguard Worker.pass1_end1: 6264*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*8, 64 6265*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 6266*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6267*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 6268*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6269*c0909341SAndroid Build Coastguard Worker 6270*c0909341SAndroid Build Coastguard Worker.pass1_end2: 6271*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*16, 64 6272*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 6273*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6274*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 6275*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6276*c0909341SAndroid Build Coastguard Worker 6277*c0909341SAndroid Build Coastguard Worker.pass1_end3: 6278*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*24, 64 6279*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*35, 16 6280*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6281*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 6282*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6283*c0909341SAndroid Build Coastguard Worker 6284*c0909341SAndroid Build Coastguard Worker.pass1_end4: 6285*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*0, 64 6286*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*43, 16 6287*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6288*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end5)] 6289*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6290*c0909341SAndroid Build Coastguard Worker 6291*c0909341SAndroid Build Coastguard Worker.pass1_end5: 6292*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*8, 64 6293*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*51, 16 6294*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6295*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end6)] 6296*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6297*c0909341SAndroid Build Coastguard Worker 6298*c0909341SAndroid Build Coastguard Worker.pass1_end6: 6299*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*16, 64 6300*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*59, 16 6301*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6302*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end7)] 6303*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end 6304*c0909341SAndroid Build Coastguard Worker 6305*c0909341SAndroid Build Coastguard Worker.pass1_end7: 6306*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*24, 64 6307*c0909341SAndroid Build Coastguard Worker 6308*c0909341SAndroid Build Coastguard Worker add coeffq, 16 6309*c0909341SAndroid Build Coastguard Worker add dstq, 16 6310*c0909341SAndroid Build Coastguard Worker dec r3d 6311*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 6312*c0909341SAndroid Build Coastguard Worker 6313*c0909341SAndroid Build Coastguard Worker.pass2: 6314*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*4+16*67] 6315*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*3+16*67] 6316*c0909341SAndroid Build Coastguard Worker mov eobd, [rsp+gprsize*1+16*67] 6317*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+32] 6318*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*35], eobd 6319*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass2_end)] 6320*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6321*c0909341SAndroid Build Coastguard Worker jmp m(idct_32x32_internal_8bpc).pass2_loop 6322*c0909341SAndroid Build Coastguard Worker 6323*c0909341SAndroid Build Coastguard Worker.pass2_end: 6324*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6325*c0909341SAndroid Build Coastguard Worker lea r3, [o(.pass2_end1)] 6326*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x32_internal_8bpc).end2 6327*c0909341SAndroid Build Coastguard Worker 6328*c0909341SAndroid Build Coastguard Worker.pass2_end1: 6329*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass2_end)] 6330*c0909341SAndroid Build Coastguard Worker add coeffq, 16*32 6331*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*35] 6332*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*3+16*35] 6333*c0909341SAndroid Build Coastguard Worker dec r3d 6334*c0909341SAndroid Build Coastguard Worker jg m(idct_32x32_internal_8bpc).pass2_loop 6335*c0909341SAndroid Build Coastguard Worker 6336*c0909341SAndroid Build Coastguard Worker.pass2_end2: 6337*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*3+16*67] 6338*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*67] 6339*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(idct_32x32_internal_8bpc).pass2_end)] 6340*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6341*c0909341SAndroid Build Coastguard Worker jmp m(idct_32x32_internal_8bpc).pass2_loop 6342*c0909341SAndroid Build Coastguard Worker 6343*c0909341SAndroid Build Coastguard Worker 6344*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_8bpc, 4, 6, 8, 16*197, dst, stride, coeff, eob, tx2 6345*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6346*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6347*c0909341SAndroid Build Coastguard Worker%endif 6348*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6349*c0909341SAndroid Build Coastguard Worker jz .dconly 6350*c0909341SAndroid Build Coastguard Worker 6351*c0909341SAndroid Build Coastguard Worker call m(idct_64x64_internal_8bpc) 6352*c0909341SAndroid Build Coastguard Worker RET 6353*c0909341SAndroid Build Coastguard Worker 6354*c0909341SAndroid Build Coastguard Worker.dconly: 6355*c0909341SAndroid Build Coastguard Worker movd m1, [o(pw_2896x8)] 6356*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [coeffq] 6357*c0909341SAndroid Build Coastguard Worker movd m2, [o(pw_8192)] 6358*c0909341SAndroid Build Coastguard Worker mov [coeffq], eobd 6359*c0909341SAndroid Build Coastguard Worker mov r3d, 64 6360*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(m(inv_txfm_add_dct_dct_64x32_8bpc).end)] 6361*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_8bpc).body 6362*c0909341SAndroid Build Coastguard Worker 6363*c0909341SAndroid Build Coastguard Workercglobal idct_64x64_internal_8bpc, 0, 0, 0, dst, stride, coeff, eob, tx2 6364*c0909341SAndroid Build Coastguard Worker mov r5d, 4 6365*c0909341SAndroid Build Coastguard Worker mov r4d, 2 6366*c0909341SAndroid Build Coastguard Worker sub eobd, 136 6367*c0909341SAndroid Build Coastguard Worker cmovns r4d, r5d 6368*c0909341SAndroid Build Coastguard Worker 6369*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6370*c0909341SAndroid Build Coastguard Worker LEA r5, $$ 6371*c0909341SAndroid Build Coastguard Worker%endif 6372*c0909341SAndroid Build Coastguard Worker 6373*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*1+16*67], eobd 6374*c0909341SAndroid Build Coastguard Worker mov r3d, r4d 6375*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*4+16*67], coeffq 6376*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*3+16*67], dstq 6377*c0909341SAndroid Build Coastguard Worker lea dstq, [rsp+gprsize+16*69] 6378*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], dstq 6379*c0909341SAndroid Build Coastguard Worker 6380*c0909341SAndroid Build Coastguard Worker.pass1_loop: 6381*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+64*0, 64*8 6382*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6383*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6384*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 6385*c0909341SAndroid Build Coastguard Worker SAVE_7ROWS rsp+gprsize+16*3, 16 6386*c0909341SAndroid Build Coastguard Worker 6387*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6388*c0909341SAndroid Build Coastguard Worker LOAD_4ROWS coeffq+64*4, 64*8 6389*c0909341SAndroid Build Coastguard Worker 6390*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6391*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6392*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+16*0] 6393*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*11, 16 6394*c0909341SAndroid Build Coastguard Worker 6395*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*2, 64*4 6396*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*19], m0 6397*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*26], m1 6398*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*23], m2 6399*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*22], m3 6400*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*21], m4 6401*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*24], m5 6402*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*25], m6 6403*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*20], m7 6404*c0909341SAndroid Build Coastguard Worker 6405*c0909341SAndroid Build Coastguard Worker call m(idct_8x32_internal_8bpc).main_fast 6406*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS rsp+gprsize+16*3, 16 6407*c0909341SAndroid Build Coastguard Worker 6408*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*1, 64*2 6409*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*35], m0 ;in1 6410*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*49], m1 ;in3 6411*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*43], m2 ;in5 6412*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*41], m3 ;in7 6413*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*39], m4 ;in9 6414*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*45], m5 ;in11 6415*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*47], m6 ;in13 6416*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*37], m7 ;in15 6417*c0909341SAndroid Build Coastguard Worker 6418*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS coeffq+64*17, 64*2 6419*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*63], m0 ;in17 6420*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*53], m1 ;in19 6421*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*55], m2 ;in21 6422*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*61], m3 ;in23 6423*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*59], m4 ;in25 6424*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*57], m5 ;in27 6425*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*51], m6 ;in29 6426*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*65], m7 ;in31 6427*c0909341SAndroid Build Coastguard Worker 6428*c0909341SAndroid Build Coastguard Worker call m(idct_16x64_internal_8bpc).main 6429*c0909341SAndroid Build Coastguard Worker 6430*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*3, 16 6431*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6432*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6433*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end)] 6434*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6435*c0909341SAndroid Build Coastguard Worker 6436*c0909341SAndroid Build Coastguard Worker.pass1_end: 6437*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*0, 64 6438*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*11, 16 6439*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6440*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6441*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end1)] 6442*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6443*c0909341SAndroid Build Coastguard Worker 6444*c0909341SAndroid Build Coastguard Worker.pass1_end1: 6445*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*8, 64 6446*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*19, 16 6447*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6448*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6449*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end2)] 6450*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6451*c0909341SAndroid Build Coastguard Worker 6452*c0909341SAndroid Build Coastguard Worker.pass1_end2: 6453*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*16, 64 6454*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*27, 16 6455*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6456*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6457*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end3)] 6458*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6459*c0909341SAndroid Build Coastguard Worker 6460*c0909341SAndroid Build Coastguard Worker.pass1_end3: 6461*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS coeffq+64*24, 64 6462*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*35, 16 6463*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6464*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6465*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end4)] 6466*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6467*c0909341SAndroid Build Coastguard Worker 6468*c0909341SAndroid Build Coastguard Worker.pass1_end4: 6469*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*0, 64 6470*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*43, 16 6471*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6472*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6473*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end5)] 6474*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6475*c0909341SAndroid Build Coastguard Worker 6476*c0909341SAndroid Build Coastguard Worker.pass1_end5: 6477*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*8, 64 6478*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*51, 16 6479*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6480*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6481*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end6)] 6482*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6483*c0909341SAndroid Build Coastguard Worker 6484*c0909341SAndroid Build Coastguard Worker.pass1_end6: 6485*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*16, 64 6486*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*59, 16 6487*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6488*c0909341SAndroid Build Coastguard Worker mova m7, [o(pw_8192)] 6489*c0909341SAndroid Build Coastguard Worker lea tx2q, [o(.pass1_end7)] 6490*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_8bpc).pass1_end1 6491*c0909341SAndroid Build Coastguard Worker 6492*c0909341SAndroid Build Coastguard Worker.pass1_end7: 6493*c0909341SAndroid Build Coastguard Worker SAVE_8ROWS dstq+64*24, 64 6494*c0909341SAndroid Build Coastguard Worker 6495*c0909341SAndroid Build Coastguard Worker add coeffq, 16 6496*c0909341SAndroid Build Coastguard Worker add dstq, 16 6497*c0909341SAndroid Build Coastguard Worker dec r3d 6498*c0909341SAndroid Build Coastguard Worker jg .pass1_loop 6499*c0909341SAndroid Build Coastguard Worker 6500*c0909341SAndroid Build Coastguard Worker.pass2: 6501*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*3+16*67] 6502*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*2+16*67] 6503*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+32] 6504*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6505*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 6506*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 6507*c0909341SAndroid Build Coastguard Worker lea r4, [o(.pass2_end)] 6508*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x64_internal_8bpc).pass2_loop 6509*c0909341SAndroid Build Coastguard Worker 6510*c0909341SAndroid Build Coastguard Worker.pass2_end: 6511*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS rsp+gprsize+16*35, 16 6512*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 6513*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+16*32+gprsize] 6514*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*0], m7 6515*c0909341SAndroid Build Coastguard Worker call m(idct_16x64_internal_8bpc).write 6516*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 6517*c0909341SAndroid Build Coastguard Worker mov r3d, [rsp+gprsize*3+16*67] 6518*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 6519*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 6520*c0909341SAndroid Build Coastguard Worker lea r4, [o(.pass2_end)] 6521*c0909341SAndroid Build Coastguard Worker 6522*c0909341SAndroid Build Coastguard Worker dec r3d 6523*c0909341SAndroid Build Coastguard Worker jg m(idct_16x64_internal_8bpc).pass2_loop 6524*c0909341SAndroid Build Coastguard Worker 6525*c0909341SAndroid Build Coastguard Worker.pass2_end2: 6526*c0909341SAndroid Build Coastguard Worker mov coeffq, [rsp+gprsize*4+16*67] 6527*c0909341SAndroid Build Coastguard Worker mov dstq, [rsp+gprsize*2+16*67] 6528*c0909341SAndroid Build Coastguard Worker mov r3d, 4 6529*c0909341SAndroid Build Coastguard Worker sub dstq, 72 6530*c0909341SAndroid Build Coastguard Worker lea r4, [dstq+8] 6531*c0909341SAndroid Build Coastguard Worker mov [rsp+gprsize*2+16*67], r4 6532*c0909341SAndroid Build Coastguard Worker lea r4, [o(m(idct_16x64_internal_8bpc).end1)] 6533*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x64_internal_8bpc).pass2_loop 6534