1*c0909341SAndroid Build Coastguard Worker; Copyright © 2018-2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2018, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 16 32*c0909341SAndroid Build Coastguard Worker 33*c0909341SAndroid Build Coastguard Worker; Note: The order of (at least some of) those constants matter! 34*c0909341SAndroid Build Coastguard Worker 35*c0909341SAndroid Build Coastguard Workerconst deint_shuf, db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 36*c0909341SAndroid Build Coastguard Worker 37*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2 38*c0909341SAndroid Build Coastguard Workerpw_%1_%2: dw %1, %2 39*c0909341SAndroid Build Coastguard Workerpw_m%2_%1: dw -%2, %1 40*c0909341SAndroid Build Coastguard Worker%endmacro 41*c0909341SAndroid Build Coastguard Worker 42*c0909341SAndroid Build Coastguard Worker; ADST-only 43*c0909341SAndroid Build Coastguard Workerpw_3803_1321: dw 3803, 1321 44*c0909341SAndroid Build Coastguard Workerpw_m1321_2482: dw -1321, 2482 45*c0909341SAndroid Build Coastguard Workerpw_2482_3344: dw 2482, 3344 46*c0909341SAndroid Build Coastguard Workerpw_m3344_3344: dw -3344, 3344 47*c0909341SAndroid Build Coastguard Workerpw_m3803_3344: dw -3803, 3344 48*c0909341SAndroid Build Coastguard Workerpw_m3803_m6688: dw -3803, -6688 49*c0909341SAndroid Build Coastguard Workerpw_2896_m2896: dw 2896, -2896 50*c0909341SAndroid Build Coastguard Worker 51*c0909341SAndroid Build Coastguard Workerconst pw_5, times 2 dw 5 52*c0909341SAndroid Build Coastguard Workerconst pw_2048, times 2 dw 2048 53*c0909341SAndroid Build Coastguard Workerconst pw_4096, times 2 dw 4096 54*c0909341SAndroid Build Coastguard Workerconst pw_8192, times 2 dw 8192 55*c0909341SAndroid Build Coastguard Workerconst pw_16384, times 2 dw 16384 56*c0909341SAndroid Build Coastguard Workerconst pw_1697x16, times 2 dw 1697*16 57*c0909341SAndroid Build Coastguard Workerconst pw_1697x8, times 2 dw 1697*8 58*c0909341SAndroid Build Coastguard Workerconst pw_2896x8, times 2 dw 2896*8 59*c0909341SAndroid Build Coastguard Workerconst pd_2048, dd 2048 60*c0909341SAndroid Build Coastguard Worker 61*c0909341SAndroid Build Coastguard Workerconst pw_2896_2896, dw 2896, 2896 62*c0909341SAndroid Build Coastguard Workerconst pw_m2896_2896, dw -2896, 2896 63*c0909341SAndroid Build Coastguard Workerconst pw_1567_3784, dw 1567, 3784 64*c0909341SAndroid Build Coastguard Workerconst pw_m3784_1567, dw -3784, 1567 65*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3784, 1567 66*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 201, 4091 67*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 995, 3973 68*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 3703 69*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2440, 3290 70*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 2751 71*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3513, 2106 72*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 1380 73*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4052, 601 74*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 4076 75*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1931, 3612 76*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 2598 77*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3920, 1189 78*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 4017 79*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3406, 2276 80*c0909341SAndroid Build Coastguard Workerpw_m799_m4017: dw -799, -4017 81*c0909341SAndroid Build Coastguard Workerconst pw_m1567_m3784, dw -1567, -3784 82*c0909341SAndroid Build Coastguard Workerpw_m3406_m2276: dw -3406, -2276 83*c0909341SAndroid Build Coastguard Workerpw_m401_m4076: dw -401, -4076 84*c0909341SAndroid Build Coastguard Workerpw_m3166_m2598: dw -3166, -2598 85*c0909341SAndroid Build Coastguard Workerpw_m1931_m3612: dw -1931, -3612 86*c0909341SAndroid Build Coastguard Workerpw_m3920_m1189: dw -3920, -1189 87*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2276, 3406 88*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 799 89*c0909341SAndroid Build Coastguard Worker 90*c0909341SAndroid Build Coastguard Worker%macro COEF_X8 1-* 91*c0909341SAndroid Build Coastguard Worker%rep %0 92*c0909341SAndroid Build Coastguard Worker dw %1*8, %1*8 93*c0909341SAndroid Build Coastguard Worker %rotate 1 94*c0909341SAndroid Build Coastguard Worker%endrep 95*c0909341SAndroid Build Coastguard Worker%endmacro 96*c0909341SAndroid Build Coastguard Worker 97*c0909341SAndroid Build Coastguard Workerpw_3703x8: COEF_X8 3703 98*c0909341SAndroid Build Coastguard Workerpw_1751x8: COEF_X8 1751 99*c0909341SAndroid Build Coastguard Workerpw_m1380x8: COEF_X8 -1380 100*c0909341SAndroid Build Coastguard Workerpw_3857x8: COEF_X8 3857 101*c0909341SAndroid Build Coastguard Workerpw_3973x8: COEF_X8 3973 102*c0909341SAndroid Build Coastguard Workerpw_995x8: COEF_X8 995 103*c0909341SAndroid Build Coastguard Workerpw_m2106x8: COEF_X8 -2106 104*c0909341SAndroid Build Coastguard Workerpw_3513x8: COEF_X8 3513 105*c0909341SAndroid Build Coastguard Workerpw_3290x8: COEF_X8 3290 106*c0909341SAndroid Build Coastguard Workerpw_2440x8: COEF_X8 2440 107*c0909341SAndroid Build Coastguard Workerpw_m601x8: COEF_X8 -601 108*c0909341SAndroid Build Coastguard Workerpw_4052x8: COEF_X8 4052 109*c0909341SAndroid Build Coastguard Worker 110*c0909341SAndroid Build Coastguard Workerconst idct64_mul 111*c0909341SAndroid Build Coastguard WorkerCOEF_X8 4095, 101, 4065, 501, 2967, -2824, 3229, -2520 112*c0909341SAndroid Build Coastguard WorkerCOEF_X8 3745, 1660, 3564, 2019, 3822, -1474, 3948, -1092 113*c0909341SAndroid Build Coastguard WorkerCOEF_X8 3996, 897, 3889, 1285, 3461, -2191, 3659, -1842 114*c0909341SAndroid Build Coastguard WorkerCOEF_X8 3349, 2359, 3102, 2675, 4036, -700, 4085, -301 115*c0909341SAndroid Build Coastguard Worker 116*c0909341SAndroid Build Coastguard Workerpw_201_4091x8: dw 201*8, 4091*8 117*c0909341SAndroid Build Coastguard Workerpw_m601_4052x8: dw -601*8, 4052*8 118*c0909341SAndroid Build Coastguard Workerpw_995_3973x8: dw 995*8, 3973*8 119*c0909341SAndroid Build Coastguard Workerpw_m1380_3857x8: dw -1380*8, 3857*8 120*c0909341SAndroid Build Coastguard Workerpw_1751_3703x8: dw 1751*8, 3703*8 121*c0909341SAndroid Build Coastguard Workerpw_m2106_3513x8: dw -2106*8, 3513*8 122*c0909341SAndroid Build Coastguard Workerpw_2440_3290x8: dw 2440*8, 3290*8 123*c0909341SAndroid Build Coastguard Workerpw_m2751_3035x8: dw -2751*8, 3035*8 124*c0909341SAndroid Build Coastguard Worker 125*c0909341SAndroid Build Coastguard Worker%define o_idct64_offset idct64_mul - (o_base) - 8 126*c0909341SAndroid Build Coastguard Worker 127*c0909341SAndroid Build Coastguard WorkerSECTION .text 128*c0909341SAndroid Build Coastguard Worker 129*c0909341SAndroid Build Coastguard Worker; Code size reduction trickery: Instead of using rip-relative loads with 130*c0909341SAndroid Build Coastguard Worker; mandatory 4-byte offsets everywhere, we can set up a base pointer with a 131*c0909341SAndroid Build Coastguard Worker; single rip-relative lea and then address things relative from that with 132*c0909341SAndroid Build Coastguard Worker; 1-byte offsets as long as data is within +-128 bytes of the base pointer. 133*c0909341SAndroid Build Coastguard Worker%define o_base deint_shuf + 128 134*c0909341SAndroid Build Coastguard Worker%define o(x) (r6 - (o_base) + (x)) 135*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) 136*c0909341SAndroid Build Coastguard Worker 137*c0909341SAndroid Build Coastguard Worker; flags: 1 = swap, 2 = interleave, 4: coef_regs 138*c0909341SAndroid Build Coastguard Worker%macro ITX_MUL2X_PACK 6-7 0 ; dst/src, tmp[1-2], rnd, coef[1-2], flags 139*c0909341SAndroid Build Coastguard Worker%if %7 & 4 140*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%5, m%1 141*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%6 142*c0909341SAndroid Build Coastguard Worker%else 143*c0909341SAndroid Build Coastguard Worker%if %7 & 1 144*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%2, [o(pw_%5_%6)] 145*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [o(pw_m%6_%5)] 146*c0909341SAndroid Build Coastguard Worker%else 147*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%2, [o(pw_m%6_%5)] 148*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [o(pw_%5_%6)] 149*c0909341SAndroid Build Coastguard Worker%endif 150*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%1 151*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%3 152*c0909341SAndroid Build Coastguard Worker%endif 153*c0909341SAndroid Build Coastguard Worker paddd m%2, m%4 154*c0909341SAndroid Build Coastguard Worker paddd m%1, m%4 155*c0909341SAndroid Build Coastguard Worker%if %7 & 2 156*c0909341SAndroid Build Coastguard Worker pslld m%2, 4 157*c0909341SAndroid Build Coastguard Worker psrld m%1, 12 158*c0909341SAndroid Build Coastguard Worker pblendw m%1, m%2, 0xaa 159*c0909341SAndroid Build Coastguard Worker%else 160*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 161*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 162*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%2 163*c0909341SAndroid Build Coastguard Worker%endif 164*c0909341SAndroid Build Coastguard Worker%endmacro 165*c0909341SAndroid Build Coastguard Worker 166*c0909341SAndroid Build Coastguard Worker; flags: 1 = swap, 2 = interleave, 4 = coef_regs 167*c0909341SAndroid Build Coastguard Worker%macro ITX_MUL4X_PACK 9-10 0 ; dst/src, tmp[1-3], rnd, coef[1-4], flags 168*c0909341SAndroid Build Coastguard Worker%if %10 & 1 169*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [o(pw_%8_%9)] 170*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%4, [o(pw_m%9_%8)] 171*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm%2, [o(pw_%6_%7)] 172*c0909341SAndroid Build Coastguard Worker vpblendd m%2, m%3, 0xf0 173*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm%3, [o(pw_m%7_%6)] 174*c0909341SAndroid Build Coastguard Worker%else 175*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [o(pw_m%9_%8)] 176*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%4, [o(pw_%8_%9)] 177*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm%2, [o(pw_m%7_%6)] 178*c0909341SAndroid Build Coastguard Worker vpblendd m%2, m%3, 0xf0 179*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm%3, [o(pw_%6_%7)] 180*c0909341SAndroid Build Coastguard Worker%endif 181*c0909341SAndroid Build Coastguard Worker vpblendd m%3, m%4, 0xf0 182*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK %1, %4, _, %5, %2, %3, (4|%10) 183*c0909341SAndroid Build Coastguard Worker%endmacro 184*c0909341SAndroid Build Coastguard Worker 185*c0909341SAndroid Build Coastguard Worker; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 186*c0909341SAndroid Build Coastguard Worker; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 187*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2W 7-8 ; dst/src[1-2], tmp[1-2], rnd, coef[1-2], dst2 188*c0909341SAndroid Build Coastguard Worker punpckhwd m%3, m%2, m%1 189*c0909341SAndroid Build Coastguard Worker punpcklwd m%2, m%1 190*c0909341SAndroid Build Coastguard Worker%if %7 < 32 191*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%7, m%2 192*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, m%7, m%3 193*c0909341SAndroid Build Coastguard Worker%else 194*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%1, [o(pw_m%7_%6)] 195*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, m%3, m%1 196*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%2 197*c0909341SAndroid Build Coastguard Worker%endif 198*c0909341SAndroid Build Coastguard Worker paddd m%4, m%5 199*c0909341SAndroid Build Coastguard Worker paddd m%1, m%5 200*c0909341SAndroid Build Coastguard Worker psrad m%4, 12 201*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 202*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%4 203*c0909341SAndroid Build Coastguard Worker%if %7 < 32 204*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%6 205*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%6 206*c0909341SAndroid Build Coastguard Worker%else 207*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%4, [o(pw_%6_%7)] 208*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%4 209*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%4 210*c0909341SAndroid Build Coastguard Worker%endif 211*c0909341SAndroid Build Coastguard Worker paddd m%3, m%5 212*c0909341SAndroid Build Coastguard Worker paddd m%2, m%5 213*c0909341SAndroid Build Coastguard Worker psrad m%3, 12 214*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 215*c0909341SAndroid Build Coastguard Worker%if %0 == 8 216*c0909341SAndroid Build Coastguard Worker packssdw m%8, m%2, m%3 217*c0909341SAndroid Build Coastguard Worker%else 218*c0909341SAndroid Build Coastguard Worker packssdw m%2, m%3 219*c0909341SAndroid Build Coastguard Worker%endif 220*c0909341SAndroid Build Coastguard Worker%endmacro 221*c0909341SAndroid Build Coastguard Worker 222*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D 7 ; src[1-4], tmp[1-2], pd_2048 223*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %2, %4, %5, %6, %7, 1567, 3784, %5 ; t2, t3 224*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %1, %3, %4, %6, %7, 2896, 2896, %4 ; t1, t0 225*c0909341SAndroid Build Coastguard Worker psubsw m%3, m%1, m%2 226*c0909341SAndroid Build Coastguard Worker paddsw m%2, m%1 227*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%4, m%5 228*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%5 229*c0909341SAndroid Build Coastguard Worker%endmacro 230*c0909341SAndroid Build Coastguard Worker 231*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D 11 ; src[1-8], tmp[1-2], pd_2048 232*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %6, %4, %9, %10, %11, 3406, 2276 ; t5a, t6a 233*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %2, %8, %9, %10, %11, 799, 4017 ; t4a, t7a 234*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %3, %7, %9, %10, %11, 1567, 3784 ; t2, t3 235*c0909341SAndroid Build Coastguard Worker paddsw m%9, m%2, m%6 ; t4 236*c0909341SAndroid Build Coastguard Worker psubsw m%2, m%6 ; t5a 237*c0909341SAndroid Build Coastguard Worker paddsw m%10, m%8, m%4 ; t7 238*c0909341SAndroid Build Coastguard Worker psubsw m%8, m%4 ; t6a 239*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %1, %5, %4, %6, %11, 2896, 2896 ; t1, t0 240*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %8, %2, %4, %6, %11, 2896, 2896 ; t5, t6 241*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%1, m%3 ; dct4 out2 242*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%1 ; dct4 out1 243*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%5, m%7 ; dct4 out0 244*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%7 ; dct4 out3 245*c0909341SAndroid Build Coastguard Worker psubsw m%7, m%3, m%2 ; out6 246*c0909341SAndroid Build Coastguard Worker paddsw m%2, m%3 ; out1 247*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%6, m%8 ; out2 248*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%8 ; out5 249*c0909341SAndroid Build Coastguard Worker psubsw m%8, m%1, m%10 ; out7 250*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%10 ; out0 251*c0909341SAndroid Build Coastguard Worker paddsw m%4, m%5, m%9 ; out3 252*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%9 ; out4 253*c0909341SAndroid Build Coastguard Worker%endmacro 254*c0909341SAndroid Build Coastguard Worker 255*c0909341SAndroid Build Coastguard Worker; in1 = %1, in3 = %2, in5 = %3, in7 = %4 256*c0909341SAndroid Build Coastguard Worker; in9 = %5, in11 = %6, in13 = %7, in15 = %8 257*c0909341SAndroid Build Coastguard Worker%macro IDCT16_1D_ODDHALF 11 ; src[1-8], tmp[1-2], pd_2048 258*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %1, %8, %9, %10, %11, 401, 4076 ; t8a, t15a 259*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %5, %4, %9, %10, %11, 3166, 2598 ; t9a, t14a 260*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %3, %6, %9, %10, %11, 1931, 3612 ; t10a, t13a 261*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %7, %2, %9, %10, %11, 3920, 1189 ; t11a, t12a 262*c0909341SAndroid Build Coastguard Worker psubsw m%9, m%2, m%6 ; t13 263*c0909341SAndroid Build Coastguard Worker paddsw m%6, m%2 ; t12 264*c0909341SAndroid Build Coastguard Worker psubsw m%2, m%8, m%4 ; t14 265*c0909341SAndroid Build Coastguard Worker paddsw m%8, m%4 ; t15 266*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%7, m%3 ; t10 267*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%7 ; t11 268*c0909341SAndroid Build Coastguard Worker psubsw m%7, m%1, m%5 ; t9 269*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%5 ; t8 270*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %2, %7, %5, %10, %11, 1567, 3784 ; t9a, t14a 271*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %9, %4, %5, %10, %11, m3784, 1567 ; t10a, t13a 272*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%1, m%3 ; t11a 273*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%3 ; t8a 274*c0909341SAndroid Build Coastguard Worker psubsw m%3, m%7, m%4 ; t13 275*c0909341SAndroid Build Coastguard Worker paddsw m%7, m%4 ; t14 276*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%8, m%6 ; t12a 277*c0909341SAndroid Build Coastguard Worker paddsw m%8, m%6 ; t15a 278*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%2, m%9 ; t10 279*c0909341SAndroid Build Coastguard Worker paddsw m%2, m%9 ; t9 280*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %3, %6, %9, %10, %11, 2896, 2896 ; t10a, t13a 281*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W %4, %5, %9, %10, %11, 2896, 2896 ; t11, t12 282*c0909341SAndroid Build Coastguard Worker%endmacro 283*c0909341SAndroid Build Coastguard Worker 284*c0909341SAndroid Build Coastguard Worker%macro WRAP_XMM 1+ 285*c0909341SAndroid Build Coastguard Worker INIT_XMM cpuname 286*c0909341SAndroid Build Coastguard Worker %1 287*c0909341SAndroid Build Coastguard Worker INIT_YMM cpuname 288*c0909341SAndroid Build Coastguard Worker%endmacro 289*c0909341SAndroid Build Coastguard Worker 290*c0909341SAndroid Build Coastguard Worker%macro ITX4_END 4-5 2048 ; row[1-4], rnd 291*c0909341SAndroid Build Coastguard Worker%if %5 292*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_%5)] 293*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 294*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 295*c0909341SAndroid Build Coastguard Worker%endif 296*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+strideq*2] 297*c0909341SAndroid Build Coastguard Worker%assign %%i 1 298*c0909341SAndroid Build Coastguard Worker%rep 4 299*c0909341SAndroid Build Coastguard Worker %if %1 & 2 300*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%row_adr, %%i, r2 + strideq*(%1&1) 301*c0909341SAndroid Build Coastguard Worker %else 302*c0909341SAndroid Build Coastguard Worker CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1) 303*c0909341SAndroid Build Coastguard Worker %endif 304*c0909341SAndroid Build Coastguard Worker %assign %%i %%i + 1 305*c0909341SAndroid Build Coastguard Worker %rotate 1 306*c0909341SAndroid Build Coastguard Worker%endrep 307*c0909341SAndroid Build Coastguard Worker movd m2, [%%row_adr1] 308*c0909341SAndroid Build Coastguard Worker pinsrd m2, [%%row_adr2], 1 309*c0909341SAndroid Build Coastguard Worker movd m3, [%%row_adr3] 310*c0909341SAndroid Build Coastguard Worker pinsrd m3, [%%row_adr4], 1 311*c0909341SAndroid Build Coastguard Worker pmovzxbw m2, m2 312*c0909341SAndroid Build Coastguard Worker pmovzxbw m3, m3 313*c0909341SAndroid Build Coastguard Worker paddw m0, m2 314*c0909341SAndroid Build Coastguard Worker paddw m1, m3 315*c0909341SAndroid Build Coastguard Worker packuswb m0, m1 316*c0909341SAndroid Build Coastguard Worker movd [%%row_adr1], m0 317*c0909341SAndroid Build Coastguard Worker pextrd [%%row_adr2], m0, 1 318*c0909341SAndroid Build Coastguard Worker pextrd [%%row_adr3], m0, 2 319*c0909341SAndroid Build Coastguard Worker pextrd [%%row_adr4], m0, 3 320*c0909341SAndroid Build Coastguard Worker ret 321*c0909341SAndroid Build Coastguard Worker%endmacro 322*c0909341SAndroid Build Coastguard Worker 323*c0909341SAndroid Build Coastguard Worker%macro IWHT4_1D_PACKED 0 324*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m0, m1 ; in1 in3 325*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 ; in0 in2 326*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m3 327*c0909341SAndroid Build Coastguard Worker paddw m0, m3 328*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m2 ; t2 t2 329*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 ; t0 t0 330*c0909341SAndroid Build Coastguard Worker psubw m1, m0, m2 331*c0909341SAndroid Build Coastguard Worker psraw m1, 1 332*c0909341SAndroid Build Coastguard Worker psubw m1, m3 ; t1 t3 333*c0909341SAndroid Build Coastguard Worker psubw m0, m1 ; ____ out0 334*c0909341SAndroid Build Coastguard Worker paddw m2, m1 ; out3 ____ 335*c0909341SAndroid Build Coastguard Worker%endmacro 336*c0909341SAndroid Build Coastguard Worker 337*c0909341SAndroid Build Coastguard WorkerINIT_XMM avx2 338*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_wht_wht_4x4_8bpc, 3, 3, 4, dst, stride, c 339*c0909341SAndroid Build Coastguard Worker mova m0, [cq+16*0] 340*c0909341SAndroid Build Coastguard Worker mova m1, [cq+16*1] 341*c0909341SAndroid Build Coastguard Worker pxor m2, m2 342*c0909341SAndroid Build Coastguard Worker mova [cq+16*0], m2 343*c0909341SAndroid Build Coastguard Worker mova [cq+16*1], m2 344*c0909341SAndroid Build Coastguard Worker psraw m0, 2 345*c0909341SAndroid Build Coastguard Worker psraw m1, 2 346*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 347*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 348*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1, m2 349*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m3 350*c0909341SAndroid Build Coastguard Worker punpckldq m0, m3 351*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 352*c0909341SAndroid Build Coastguard Worker vpblendd m0, m2, 0x03 353*c0909341SAndroid Build Coastguard Worker ITX4_END 3, 0, 2, 1, 0 354*c0909341SAndroid Build Coastguard Worker 355*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 3 ; type1, type2, size 356*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%3_8bpc, 4, 5, 0, dst, stride, c, eob, tx2 357*c0909341SAndroid Build Coastguard Worker %define %%p1 m(i%1_%3_internal_8bpc) 358*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 359*c0909341SAndroid Build Coastguard Worker ; Jump to the 1st txfm function if we're not taking the fast path, which 360*c0909341SAndroid Build Coastguard Worker ; in turn performs an indirect jump to the 2nd txfm function. 361*c0909341SAndroid Build Coastguard Worker lea tx2q, [m(i%2_%3_internal_8bpc).pass2] 362*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 363*c0909341SAndroid Build Coastguard Worker test eobd, eobd 364*c0909341SAndroid Build Coastguard Worker jnz %%p1 365*c0909341SAndroid Build Coastguard Worker%else 366*c0909341SAndroid Build Coastguard Worker ; jump to the 1st txfm function unless it's located directly after this 367*c0909341SAndroid Build Coastguard Worker times ((%%end - %%p1) >> 31) & 1 jmp %%p1 368*c0909341SAndroid Build Coastguard WorkerALIGN function_align 369*c0909341SAndroid Build Coastguard Worker%%end: 370*c0909341SAndroid Build Coastguard Worker%endif 371*c0909341SAndroid Build Coastguard Worker%endmacro 372*c0909341SAndroid Build Coastguard Worker 373*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X4_FN 2 ; type1, type2 374*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x4 375*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 376*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, [cq] 377*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_2896x8)] 378*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 379*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 380*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 381*c0909341SAndroid Build Coastguard Worker mova m1, m0 382*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_8bpc).end2 383*c0909341SAndroid Build Coastguard Worker%endif 384*c0909341SAndroid Build Coastguard Worker%endmacro 385*c0909341SAndroid Build Coastguard Worker 386*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED 0 387*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pd_2048)] 388*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m0 389*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m0 390*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 0, 3, 4, 1567, 3784 391*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 0, 3, 4, 2896, 2896 392*c0909341SAndroid Build Coastguard Worker paddsw m0, m1, m2 ; out0 out1 393*c0909341SAndroid Build Coastguard Worker psubsw m1, m2 ; out3 out2 394*c0909341SAndroid Build Coastguard Worker%endmacro 395*c0909341SAndroid Build Coastguard Worker 396*c0909341SAndroid Build Coastguard Worker%macro IADST4_1D_PACKED 0 397*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m0 398*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m1, m0 399*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_m3344_3344)] 400*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(pw_3803_1321)] 401*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_m1321_2482)] 402*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m5, m2 ; 3344*in3 - 3344*in2 403*c0909341SAndroid Build Coastguard Worker psrld m5, 16 404*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m2 405*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m4 406*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 ; 3344*in0 407*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; 3344*in0 - 3344*in2 + 3344*in3 408*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2482_3344)] 409*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_m3803_3344)] 410*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m3 411*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 412*c0909341SAndroid Build Coastguard Worker paddd m4, m0 ; 1321*in0 + 3344*in1 + 3803*in2 + 2482*in3 413*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(pw_m3803_m6688)] 414*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m0 415*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(pd_2048)] 416*c0909341SAndroid Build Coastguard Worker paddd m2, m0 417*c0909341SAndroid Build Coastguard Worker paddd m1, m0 418*c0909341SAndroid Build Coastguard Worker paddd m0, m4 419*c0909341SAndroid Build Coastguard Worker paddd m5, m2 ; 2482*in0 + 3344*in1 - 1321*in2 - 3803*in3 420*c0909341SAndroid Build Coastguard Worker paddd m2, m4 421*c0909341SAndroid Build Coastguard Worker paddd m2, m3 422*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m1, m2, m0, m5 423*c0909341SAndroid Build Coastguard Worker packssdw m0, m5 ; out0 out1 424*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 ; out2 out3 425*c0909341SAndroid Build Coastguard Worker%endmacro 426*c0909341SAndroid Build Coastguard Worker 427*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct 428*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst 429*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst 430*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity 431*c0909341SAndroid Build Coastguard Worker 432*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_8bpc, 0, 5, 6, dst, stride, c, eob, tx2 433*c0909341SAndroid Build Coastguard Worker mova m0, [cq+16*0] 434*c0909341SAndroid Build Coastguard Worker mova m1, [cq+16*1] 435*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 436*c0909341SAndroid Build Coastguard Worker mova m2, [o(deint_shuf)] 437*c0909341SAndroid Build Coastguard Worker shufps m3, m0, m1, q1331 438*c0909341SAndroid Build Coastguard Worker shufps m0, m1, q0220 439*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 440*c0909341SAndroid Build Coastguard Worker pshufb m1, m3, m2 441*c0909341SAndroid Build Coastguard Worker jmp tx2q 442*c0909341SAndroid Build Coastguard Worker.pass2: 443*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 444*c0909341SAndroid Build Coastguard Worker pxor m2, m2 445*c0909341SAndroid Build Coastguard Worker mova [cq+16*0], m2 446*c0909341SAndroid Build Coastguard Worker mova [cq+16*1], m2 447*c0909341SAndroid Build Coastguard Worker ITX4_END 0, 1, 3, 2 448*c0909341SAndroid Build Coastguard Worker 449*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct 450*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst 451*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst 452*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity 453*c0909341SAndroid Build Coastguard Worker 454*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_8bpc, 0, 5, 6, dst, stride, c, eob, tx2 455*c0909341SAndroid Build Coastguard Worker mova m0, [cq+16*0] 456*c0909341SAndroid Build Coastguard Worker mova m1, [cq+16*1] 457*c0909341SAndroid Build Coastguard Worker call .main 458*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 459*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 460*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 461*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 462*c0909341SAndroid Build Coastguard Worker jmp tx2q 463*c0909341SAndroid Build Coastguard Worker.pass2: 464*c0909341SAndroid Build Coastguard Worker call .main 465*c0909341SAndroid Build Coastguard Worker.end: 466*c0909341SAndroid Build Coastguard Worker pxor m2, m2 467*c0909341SAndroid Build Coastguard Worker mova [cq+16*0], m2 468*c0909341SAndroid Build Coastguard Worker mova [cq+16*1], m2 469*c0909341SAndroid Build Coastguard Worker.end2: 470*c0909341SAndroid Build Coastguard Worker ITX4_END 0, 1, 2, 3 471*c0909341SAndroid Build Coastguard WorkerALIGN function_align 472*c0909341SAndroid Build Coastguard Workercglobal_label .main 473*c0909341SAndroid Build Coastguard Worker IADST4_1D_PACKED 474*c0909341SAndroid Build Coastguard Worker ret 475*c0909341SAndroid Build Coastguard Worker 476*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct 477*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst 478*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst 479*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity 480*c0909341SAndroid Build Coastguard Worker 481*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_8bpc, 0, 5, 6, dst, stride, c, eob, tx2 482*c0909341SAndroid Build Coastguard Worker mova m0, [cq+16*0] 483*c0909341SAndroid Build Coastguard Worker mova m1, [cq+16*1] 484*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 485*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m0 486*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0 487*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 488*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 489*c0909341SAndroid Build Coastguard Worker jmp tx2q 490*c0909341SAndroid Build Coastguard Worker.pass2: 491*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 492*c0909341SAndroid Build Coastguard Worker.end: 493*c0909341SAndroid Build Coastguard Worker pxor m2, m2 494*c0909341SAndroid Build Coastguard Worker mova [cq+16*0], m2 495*c0909341SAndroid Build Coastguard Worker mova [cq+16*1], m2 496*c0909341SAndroid Build Coastguard Worker.end2: 497*c0909341SAndroid Build Coastguard Worker ITX4_END 3, 2, 1, 0 498*c0909341SAndroid Build Coastguard Worker 499*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct 500*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst 501*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst 502*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity 503*c0909341SAndroid Build Coastguard Worker 504*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_8bpc, 0, 5, 6, dst, stride, c, eob, tx2 505*c0909341SAndroid Build Coastguard Worker mova m0, [cq+16*0] 506*c0909341SAndroid Build Coastguard Worker mova m1, [cq+16*1] 507*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_1697x8)] 508*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, m0 509*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 510*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 511*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 512*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 513*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 514*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m2 515*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 516*c0909341SAndroid Build Coastguard Worker jmp tx2q 517*c0909341SAndroid Build Coastguard Worker.pass2: 518*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_1697x8)] 519*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, m0 520*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 521*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 522*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 523*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_8bpc).end 524*c0909341SAndroid Build Coastguard Worker 525*c0909341SAndroid Build Coastguard Worker%macro WRITE_4X8 2 ; coefs[1-2] 526*c0909341SAndroid Build Coastguard Worker movd xm4, [dstq+strideq*0] 527*c0909341SAndroid Build Coastguard Worker pinsrd xm4, [dstq+strideq*1], 1 528*c0909341SAndroid Build Coastguard Worker movd xm5, [dstq+strideq*2] 529*c0909341SAndroid Build Coastguard Worker pinsrd xm5, [dstq+r3 ], 1 530*c0909341SAndroid Build Coastguard Worker pinsrd xm4, [r2 +strideq*0], 2 531*c0909341SAndroid Build Coastguard Worker pinsrd xm4, [r2 +strideq*1], 3 532*c0909341SAndroid Build Coastguard Worker pinsrd xm5, [r2 +strideq*2], 2 533*c0909341SAndroid Build Coastguard Worker pinsrd xm5, [r2 +r3 ], 3 534*c0909341SAndroid Build Coastguard Worker pmovzxbw m4, xm4 535*c0909341SAndroid Build Coastguard Worker pmovzxbw m5, xm5 536*c0909341SAndroid Build Coastguard Worker paddw m4, m%1 537*c0909341SAndroid Build Coastguard Worker paddw m5, m%2 538*c0909341SAndroid Build Coastguard Worker packuswb m4, m5 539*c0909341SAndroid Build Coastguard Worker vextracti128 xm5, m4, 1 540*c0909341SAndroid Build Coastguard Worker movd [dstq+strideq*0], xm4 541*c0909341SAndroid Build Coastguard Worker pextrd [dstq+strideq*1], xm4, 1 542*c0909341SAndroid Build Coastguard Worker pextrd [dstq+strideq*2], xm4, 2 543*c0909341SAndroid Build Coastguard Worker pextrd [dstq+r3 ], xm4, 3 544*c0909341SAndroid Build Coastguard Worker movd [r2 +strideq*0], xm5 545*c0909341SAndroid Build Coastguard Worker pextrd [r2 +strideq*1], xm5, 1 546*c0909341SAndroid Build Coastguard Worker pextrd [r2 +strideq*2], xm5, 2 547*c0909341SAndroid Build Coastguard Worker pextrd [r2 +r3 ], xm5, 3 548*c0909341SAndroid Build Coastguard Worker%endmacro 549*c0909341SAndroid Build Coastguard Worker 550*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X8_FN 2 ; type1, type2 551*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x8 552*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 553*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 554*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 555*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_2048)] 556*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 557*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 558*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 559*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 560*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 561*c0909341SAndroid Build Coastguard Worker mova m1, m0 562*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end3 563*c0909341SAndroid Build Coastguard Worker%endif 564*c0909341SAndroid Build Coastguard Worker%endmacro 565*c0909341SAndroid Build Coastguard Worker 566*c0909341SAndroid Build Coastguard Worker%macro IDCT8_1D_PACKED 0 567*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pd_2048)] 568*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m3, m0 ; in7 in1 569*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1, m2 ; in3 in5 570*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 ; in6 in2 571*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m0 ; in4 in0 572*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 0, 1, 6, 799, 4017, 3 ; t4a t7a 573*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 0, 1, 6, 3406, 2276, 3 ; t5a t6a 574*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 0, 1, 6, 1567, 3784 ; t3 t2 575*c0909341SAndroid Build Coastguard Worker psubsw m0, m5, m4 ; t5a t6a (interleaved) 576*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ; t4 t7 (interleaved) 577*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 1, 5, 6, 2896, 2896 ; t0 t1 578*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_m2896_2896)] 579*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 1, _, 6, 1, 5, 4 ; t6 t5 580*c0909341SAndroid Build Coastguard Worker%if mmsize > 16 581*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m1, [o(deint_shuf)] 582*c0909341SAndroid Build Coastguard Worker pshufb m4, m1 583*c0909341SAndroid Build Coastguard Worker%else 584*c0909341SAndroid Build Coastguard Worker pshufb m4, [o(deint_shuf)] 585*c0909341SAndroid Build Coastguard Worker%endif 586*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m3 ; tmp3 tmp2 587*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ; tmp0 tmp1 588*c0909341SAndroid Build Coastguard Worker shufps m2, m4, m0, q1032 ; t7 t6 589*c0909341SAndroid Build Coastguard Worker vpblendd m4, m0, 0xcc ; t4 t5 590*c0909341SAndroid Build Coastguard Worker paddsw m0, m3, m2 ; out0 out1 591*c0909341SAndroid Build Coastguard Worker psubsw m3, m2 ; out7 out6 592*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m4 ; out4 out5 593*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ; out3 out2 594*c0909341SAndroid Build Coastguard Worker%endmacro 595*c0909341SAndroid Build Coastguard Worker 596*c0909341SAndroid Build Coastguard Worker%macro IADST8_1D_PACKED 1 ; pass 597*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pd_2048)] 598*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m4, m3 ; 0 7 599*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m5, m2 ; 2 5 600*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; 4 3 601*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 6 1 602*c0909341SAndroid Build Coastguard Worker%if %1 == 1 603*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 4, 5, 6, 401, 4076, 3 ; t1a t0a 604*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 4, 5, 6, 1931, 3612, 2 ; t2a t3a 605*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 4, 5, 6, 3166, 2598, 3 ; t5a t4a 606*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 5, 6, 3920, 1189, 2 ; t6a t7a 607*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ; t5 t4 608*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ; t1 t0 609*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ; t6 t7 610*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ; t2 t3 611*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 2, 3, 6, 1567, 3784, 3 ; t5a t4a 612*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 2, 3, 6, 3784, 1567, 2 ; t7a t6a 613*c0909341SAndroid Build Coastguard Worker%if mmsize > 16 614*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [o(deint_shuf)] 615*c0909341SAndroid Build Coastguard Worker%else 616*c0909341SAndroid Build Coastguard Worker mova m2, [o(deint_shuf)] 617*c0909341SAndroid Build Coastguard Worker%endif 618*c0909341SAndroid Build Coastguard Worker pshuflw m1, m1, q2301 619*c0909341SAndroid Build Coastguard Worker pshufhw m1, m1, q2301 620*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m1 ; t3 t2 621*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; -out7 out0 622*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m5 ; t7 t6 623*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ; out6 -out1 624*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 625*c0909341SAndroid Build Coastguard Worker pshufb m4, m2 626*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_m2896_2896)] 627*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5, m3 628*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1 629*c0909341SAndroid Build Coastguard Worker paddd m2, m6 630*c0909341SAndroid Build Coastguard Worker paddd m5, m6 631*c0909341SAndroid Build Coastguard Worker psrad m2, 12 632*c0909341SAndroid Build Coastguard Worker psrad m5, 12 633*c0909341SAndroid Build Coastguard Worker packssdw m2, m5 ; out4 -out5 634*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2896_2896)] 635*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 636*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m5 637*c0909341SAndroid Build Coastguard Worker paddd m3, m6 638*c0909341SAndroid Build Coastguard Worker paddd m1, m6 639*c0909341SAndroid Build Coastguard Worker psrad m3, 12 640*c0909341SAndroid Build Coastguard Worker psrad m1, 12 641*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 ; out2 -out3 642*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m4, m0 ; out6 -out7 643*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m4 ; out0 -out1 644*c0909341SAndroid Build Coastguard Worker%else 645*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 4, 5, 6, 401, 4076 ; t0a t1a 646*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 4, 5, 6, 1931, 3612 ; t2a t3a 647*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 4, 5, 6, 3166, 2598 ; t4a t5a 648*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 5, 6, 3920, 1189 ; t6a t7a 649*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m2 ; t4 t5 650*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ; t0 t1 651*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m3 ; t6 t7 652*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ; t2 t3 653*c0909341SAndroid Build Coastguard Worker shufps m2, m5, m4, q1032 654*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2 655*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2 656*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 2, 3, 6, 1567, 3784, 1 ; t5a t4a 657*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 2, 3, 6, 3784, 1567 ; t7a t6a 658*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m1 ; t2 t3 659*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; out0 -out7 660*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m5 ; t7 t6 661*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ; out6 -out1 662*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2896x8)] 663*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, m4, 0x33 ; out6 -out7 664*c0909341SAndroid Build Coastguard Worker vpblendd m0, m4, 0xcc ; out0 -out1 665*c0909341SAndroid Build Coastguard Worker shufps m4, m2, m1, q1032 ; t3 t7 666*c0909341SAndroid Build Coastguard Worker vpblendd m1, m2, 0x33 ; t2 t6 667*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m4 ; t2-t3 t6-t7 668*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ; t2+t3 t6+t7 669*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 ; out4 -out5 670*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 671*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 ; out2 -out3 672*c0909341SAndroid Build Coastguard Worker%endif 673*c0909341SAndroid Build Coastguard Worker%endmacro 674*c0909341SAndroid Build Coastguard Worker 675*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2 676*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct 677*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst 678*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst 679*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity 680*c0909341SAndroid Build Coastguard Worker 681*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 682*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 683*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 684*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_2896x8)] 685*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 686*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 687*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 688*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [o(deint_shuf)] 689*c0909341SAndroid Build Coastguard Worker shufps m3, m0, m1, q1331 690*c0909341SAndroid Build Coastguard Worker shufps m0, m1, q0220 691*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 692*c0909341SAndroid Build Coastguard Worker pshufb m1, m3, m2 693*c0909341SAndroid Build Coastguard Worker jmp tx2q 694*c0909341SAndroid Build Coastguard Worker.pass2: 695*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 696*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 697*c0909341SAndroid Build Coastguard Worker call .main 698*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 699*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 700*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm3, 1 701*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 702*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end2 703*c0909341SAndroid Build Coastguard WorkerALIGN function_align 704*c0909341SAndroid Build Coastguard Workercglobal_label .main 705*c0909341SAndroid Build Coastguard Worker WRAP_XMM IDCT8_1D_PACKED 706*c0909341SAndroid Build Coastguard Worker ret 707*c0909341SAndroid Build Coastguard Worker 708*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct 709*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst 710*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst 711*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity 712*c0909341SAndroid Build Coastguard Worker 713*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 714*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 715*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 716*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_2896x8)] 717*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 718*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 719*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 720*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 721*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 722*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 723*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 724*c0909341SAndroid Build Coastguard Worker jmp tx2q 725*c0909341SAndroid Build Coastguard Worker.pass2: 726*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 727*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 728*c0909341SAndroid Build Coastguard Worker pshufd xm4, xm0, q1032 729*c0909341SAndroid Build Coastguard Worker pshufd xm5, xm1, q1032 730*c0909341SAndroid Build Coastguard Worker call .main_pass2 731*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 732*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 733*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm3, 1 734*c0909341SAndroid Build Coastguard Worker pxor m5, m5 735*c0909341SAndroid Build Coastguard Worker psubw m5, m4 736*c0909341SAndroid Build Coastguard Worker.end: 737*c0909341SAndroid Build Coastguard Worker vpblendd m4, m5, 0xcc 738*c0909341SAndroid Build Coastguard Worker.end2: 739*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 740*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 741*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 742*c0909341SAndroid Build Coastguard Worker pxor m2, m2 743*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m2 744*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m2 745*c0909341SAndroid Build Coastguard Worker.end3: 746*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+strideq*4] 747*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 748*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1 749*c0909341SAndroid Build Coastguard Worker RET 750*c0909341SAndroid Build Coastguard WorkerALIGN function_align 751*c0909341SAndroid Build Coastguard Worker.main_pass1: 752*c0909341SAndroid Build Coastguard Worker WRAP_XMM IADST8_1D_PACKED 1 753*c0909341SAndroid Build Coastguard Worker ret 754*c0909341SAndroid Build Coastguard WorkerALIGN function_align 755*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2 756*c0909341SAndroid Build Coastguard Worker WRAP_XMM IADST8_1D_PACKED 2 757*c0909341SAndroid Build Coastguard Worker ret 758*c0909341SAndroid Build Coastguard Worker 759*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct 760*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst 761*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst 762*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity 763*c0909341SAndroid Build Coastguard Worker 764*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 765*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 766*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 767*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_2896x8)] 768*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 769*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 770*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 771*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1, m0 772*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0 773*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m3 774*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 775*c0909341SAndroid Build Coastguard Worker jmp tx2q 776*c0909341SAndroid Build Coastguard Worker.pass2: 777*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 778*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 779*c0909341SAndroid Build Coastguard Worker pshufd xm4, xm0, q1032 780*c0909341SAndroid Build Coastguard Worker pshufd xm5, xm1, q1032 781*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main_pass2 782*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2048)] 783*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm1, 1 784*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm0, 1 785*c0909341SAndroid Build Coastguard Worker pxor m4, m4 786*c0909341SAndroid Build Coastguard Worker psubw m4, m5 787*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q1032 788*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1032 789*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end 790*c0909341SAndroid Build Coastguard Worker 791*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct 792*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst 793*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst 794*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity 795*c0909341SAndroid Build Coastguard Worker 796*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 797*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*0], q3120 798*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*1], q3120 799*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_2896x8)] 800*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_1697x8)] 801*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m0 802*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0 803*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3 804*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3 805*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 806*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 807*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4, m0 808*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m1 809*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 810*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 811*c0909341SAndroid Build Coastguard Worker jmp tx2q 812*c0909341SAndroid Build Coastguard Worker.pass2: 813*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_4096)] 814*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).end2 815*c0909341SAndroid Build Coastguard Worker 816*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X16_FN 2 ; type1, type2 817*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 4x16 818*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 819*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 820*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 821*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 822*c0909341SAndroid Build Coastguard Worker movd xm3, [o(pw_2048)] 823*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 824*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 825*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 826*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm3 827*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 828*c0909341SAndroid Build Coastguard Worker mova m1, m0 829*c0909341SAndroid Build Coastguard Worker mova m2, m0 830*c0909341SAndroid Build Coastguard Worker mova m3, m0 831*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end3 832*c0909341SAndroid Build Coastguard Worker%endif 833*c0909341SAndroid Build Coastguard Worker%endmacro 834*c0909341SAndroid Build Coastguard Worker 835*c0909341SAndroid Build Coastguard Worker%macro IDCT16_1D_PACKED 0 836*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 837*c0909341SAndroid Build Coastguard Worker.main2: 838*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m7, m0 ; dct16 in15 in1 839*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m4, m0 ; dct4 in2 in0 840*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m3, m4 ; dct16 in7 in9 841*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m1 ; dct8 in7 in1 842*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6 ; dct16 in3 in13 843*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5 ; dct8 in3 in5 844*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m2 ; dct16 in11 in5 845*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m2 ; dct4 in3 in1 846*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 8, 2, 4, 10, 401, 4076, 3 ; t8a t15a 847*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 2, 4, 10, 3166, 2598, 3 ; t9a t14a 848*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 2, 4, 10, 3920, 1189, 3 ; t11a t12a 849*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 2, 4, 10, 1931, 3612, 3 ; t10a t13a 850*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 2, 4, 10, 799, 4017, 3 ; t4a t7a 851*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 2, 4, 10, 3406, 2276, 3 ; t5a t6a 852*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 6, 2, 4, 10, 1567, 3784 ; t3 t2 853*c0909341SAndroid Build Coastguard Worker psubsw m2, m8, m0 ; t9 t14 854*c0909341SAndroid Build Coastguard Worker paddsw m8, m0 ; t8 t15 855*c0909341SAndroid Build Coastguard Worker psubsw m0, m1, m5 ; t10 t13 856*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ; t11 t12 857*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_m3784_1567)] ; reuse pw_1567_3784 858*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 4, _, 10, 4, 5, 6 ; t9a t14a 859*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_m1567_m3784)] ; reuse pw_m3784_1567 860*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 5, _, 10, 5, 4, 6 ; t10a t13a 861*c0909341SAndroid Build Coastguard Worker psubsw m4, m8, m1 ; t11a t12a 862*c0909341SAndroid Build Coastguard Worker paddsw m8, m1 ; t8a t15a 863*c0909341SAndroid Build Coastguard Worker psubsw m1, m7, m3 ; t5a t6a 864*c0909341SAndroid Build Coastguard Worker paddsw m7, m3 ; t4 t7 865*c0909341SAndroid Build Coastguard Worker paddsw m3, m2, m0 ; t9 t14 866*c0909341SAndroid Build Coastguard Worker psubsw m2, m0 ; t10 t13 867*c0909341SAndroid Build Coastguard Worker%if mmsize > 16 868*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [o(deint_shuf)] 869*c0909341SAndroid Build Coastguard Worker%else 870*c0909341SAndroid Build Coastguard Worker mova m0, [o(deint_shuf)] 871*c0909341SAndroid Build Coastguard Worker%endif 872*c0909341SAndroid Build Coastguard Worker pshufb m8, m0 873*c0909341SAndroid Build Coastguard Worker pshufb m7, m0 874*c0909341SAndroid Build Coastguard Worker pshufb m3, m0 875*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 9, 0, 5, 10, 2896, 2896 ; t0 t1 876*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(pw_m2896_2896)] 877*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 5, _, 10, 5, 0, 4 ; t11 t12 878*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2896_2896)] 879*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 0, _, 10, 0, 5, 4 ; t6 t5 880*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(pw_m2896_2896)] 881*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 0, _, 10, 0, 5, 4 ; t13a t10a 882*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m8, m3 ; t15a t14 883*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m3 ; t8a t9 884*c0909341SAndroid Build Coastguard Worker shufps m5, m4, m2, q1032 ; t12 t13a 885*c0909341SAndroid Build Coastguard Worker vpblendd m4, m2, 0xcc ; t11 t10a 886*c0909341SAndroid Build Coastguard Worker shufps m2, m7, m1, q1032 ; t7 t6 887*c0909341SAndroid Build Coastguard Worker vpblendd m7, m1, 0xcc ; t4 t5 888*c0909341SAndroid Build Coastguard Worker psubsw m1, m9, m6 ; dct4 out3 out2 889*c0909341SAndroid Build Coastguard Worker paddsw m9, m6 ; dct4 out0 out1 890*c0909341SAndroid Build Coastguard Worker psubsw m3, m9, m2 ; dct8 out7 out6 891*c0909341SAndroid Build Coastguard Worker paddsw m9, m2 ; dct8 out0 out1 892*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m7 ; dct8 out4 out5 893*c0909341SAndroid Build Coastguard Worker paddsw m1, m7 ; dct8 out3 out2 894*c0909341SAndroid Build Coastguard Worker psubsw m7, m9, m0 ; out15 out14 895*c0909341SAndroid Build Coastguard Worker paddsw m0, m9 ; out0 out1 896*c0909341SAndroid Build Coastguard Worker psubsw m6, m1, m5 ; out12 out13 897*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 ; out3 out2 898*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m4 ; out11 out10 899*c0909341SAndroid Build Coastguard Worker paddsw m2, m4 ; out4 out5 900*c0909341SAndroid Build Coastguard Worker psubsw m4, m3, m8 ; out8 out9 901*c0909341SAndroid Build Coastguard Worker paddsw m3, m8 ; out7 out6 902*c0909341SAndroid Build Coastguard Worker%endmacro 903*c0909341SAndroid Build Coastguard Worker 904*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct 905*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst 906*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst 907*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity 908*c0909341SAndroid Build Coastguard Worker 909*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 910*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 911*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 912*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 913*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 914*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_8bpc).main 915*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_16384)] 916*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2, m3 917*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 918*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 919*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 920*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m0, m4, m2, m3 921*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 922*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 923*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 924*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 925*c0909341SAndroid Build Coastguard Worker jmp tx2q 926*c0909341SAndroid Build Coastguard Worker.pass2: 927*c0909341SAndroid Build Coastguard Worker vextracti128 xm4, m0, 1 928*c0909341SAndroid Build Coastguard Worker vextracti128 xm5, m1, 1 929*c0909341SAndroid Build Coastguard Worker vextracti128 xm6, m2, 1 930*c0909341SAndroid Build Coastguard Worker vextracti128 xm7, m3, 1 931*c0909341SAndroid Build Coastguard Worker call .main 932*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 933*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm5, 1 934*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2048)] 935*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm6, 1 936*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm7, 1 937*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 938*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 939*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end2 940*c0909341SAndroid Build Coastguard WorkerALIGN function_align 941*c0909341SAndroid Build Coastguard Workercglobal_label .main 942*c0909341SAndroid Build Coastguard Worker WRAP_XMM IDCT16_1D_PACKED 943*c0909341SAndroid Build Coastguard Worker ret 944*c0909341SAndroid Build Coastguard Worker 945*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct 946*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst 947*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst 948*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity 949*c0909341SAndroid Build Coastguard Worker 950*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 951*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 952*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 953*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 954*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 955*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 956*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_16384)] 957*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2, m3 958*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 959*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 960*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 961*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m4, m2, m3, m0 962*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 963*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 964*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 965*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 966*c0909341SAndroid Build Coastguard Worker jmp tx2q 967*c0909341SAndroid Build Coastguard Worker.pass2: 968*c0909341SAndroid Build Coastguard Worker call .main 969*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2896x8)] 970*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m4 971*c0909341SAndroid Build Coastguard Worker psubsw m2, m4 972*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 ; -out7 out4 out6 -out5 973*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 ; out8 -out11 -out9 out10 974*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2048)] 975*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 976*c0909341SAndroid Build Coastguard Worker vpblendd m4, m1, m0, 0x33 977*c0909341SAndroid Build Coastguard Worker vpblendd m0, m2, 0x33 978*c0909341SAndroid Build Coastguard Worker vpblendd m2, m3, 0x33 979*c0909341SAndroid Build Coastguard Worker vpblendd m3, m1, 0x33 980*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q2031 981*c0909341SAndroid Build Coastguard Worker vpermq m1, m2, q1302 982*c0909341SAndroid Build Coastguard Worker vpermq m2, m3, q3120 983*c0909341SAndroid Build Coastguard Worker vpermq m3, m4, q0213 984*c0909341SAndroid Build Coastguard Worker psubw m6, m7, m5 985*c0909341SAndroid Build Coastguard Worker.end: 986*c0909341SAndroid Build Coastguard Worker vpblendd m5, m6, 0xcc 987*c0909341SAndroid Build Coastguard Worker.end2: 988*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m0, m1, m2, m3 989*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 990*c0909341SAndroid Build Coastguard Worker pxor m4, m4 991*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 992*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 993*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m4 994*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m4 995*c0909341SAndroid Build Coastguard Worker.end3: 996*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+strideq*8] 997*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 998*c0909341SAndroid Build Coastguard Worker WRITE_4X8 0, 1 999*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1000*c0909341SAndroid Build Coastguard Worker lea r2, [r2 +strideq*4] 1001*c0909341SAndroid Build Coastguard Worker WRITE_4X8 2, 3 1002*c0909341SAndroid Build Coastguard Worker RET 1003*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1004*c0909341SAndroid Build Coastguard Worker.main: 1005*c0909341SAndroid Build Coastguard Worker vpblendd m4, m1, m0, 0xcc 1006*c0909341SAndroid Build Coastguard Worker vpblendd m1, m0, 0x33 1007*c0909341SAndroid Build Coastguard Worker vpblendd m5, m2, m3, 0xcc 1008*c0909341SAndroid Build Coastguard Worker vpblendd m2, m3, 0x33 1009*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m5, m2, 0x31 1010*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m1, xm4, 1 ; in0 in3 in2 in1 1011*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m1, m4, 0x31 1012*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m5, xm2, 1 ; in4 in7 in6 in5 1013*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 ; in15 in12 in13 in14 1014*c0909341SAndroid Build Coastguard Worker pshufd m2, m4, q1032 ; in11 in8 in9 in10 1015*c0909341SAndroid Build Coastguard Workercglobal_label .main2 1016*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pd_2048)] 1017*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1018*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m0 ; in12 in3 in14 in1 1019*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 ; in0 in15 in2 in13 1020*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m1 ; in8 in7 in10 in5 1021*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ; in4 in11 in6 in9 1022*c0909341SAndroid Build Coastguard Worker ITX_MUL4X_PACK 0, 2, 5, 6, 8, 201, 4091, 995, 3973, 3 1023*c0909341SAndroid Build Coastguard Worker ITX_MUL4X_PACK 1, 2, 5, 6, 8, 1751, 3703, 2440, 3290, 3 1024*c0909341SAndroid Build Coastguard Worker ITX_MUL4X_PACK 3, 2, 5, 6, 8, 3035, 2751, 3513, 2106, 3 1025*c0909341SAndroid Build Coastguard Worker ITX_MUL4X_PACK 4, 2, 5, 6, 8, 3857, 1380, 4052, 601, 3 1026*c0909341SAndroid Build Coastguard Worker psubsw m2, m0, m3 ; t9a t8a t11a t10a 1027*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ; t1a t0a t3a t2a 1028*c0909341SAndroid Build Coastguard Worker psubsw m3, m1, m4 ; t13a t12a t15a t14a 1029*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ; t5a t4a t7a t6a 1030*c0909341SAndroid Build Coastguard Worker ITX_MUL4X_PACK 2, 4, 5, 6, 8, 799, 4017, 3406, 2276, 3 1031*c0909341SAndroid Build Coastguard Worker psubw m6, m7, m5 1032*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 5, _, 8, 6, 4, 6 1033*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_m3784_1567)] 1034*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_1567_3784)] 1035*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m1 ; t5 t4 t7 t6 1036*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; t1 t0 t3 t2 1037*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m3 ; t13a t12a t15a t14a 1038*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 ; t9a t8a t11a t10a 1039*c0909341SAndroid Build Coastguard Worker psubw m3, m7, m6 ; pw_3784_m1567 1040*c0909341SAndroid Build Coastguard Worker vpblendd m6, m3, 0xf0 1041*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 3, _, 8, 6, 5, 4 ; t4a t5a t7a t6a 1042*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 3, _, 8, 6, 5, 4 ; t12 t13 t15 t14 1043*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [o(deint_shuf)] 1044*c0909341SAndroid Build Coastguard Worker pshufb m0, m5 1045*c0909341SAndroid Build Coastguard Worker pshufb m2, m5 1046*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m2, 0x31 ; t3 t2 t11a t10a 1047*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 ; t1 t0 t9a t8a 1048*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m1, 0x31 ; t7a t6a t15 t14 1049*c0909341SAndroid Build Coastguard Worker vinserti128 m4, xm1, 1 ; t4a t5a t12 t13 1050*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1032 ; t6a t7a t14 t15 1051*c0909341SAndroid Build Coastguard Worker psubsw m1, m0, m3 ; t3a t2a t11 t10 1052*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ; -out15 out0 out14 -out1 1053*c0909341SAndroid Build Coastguard Worker paddsw m3, m4, m2 ; -out3 out12 out2 -out13 1054*c0909341SAndroid Build Coastguard Worker psubsw m4, m2 ; t6 t7 t14a t15a 1055*c0909341SAndroid Build Coastguard Worker shufps m2, m1, m4, q1032 ; t2a t6 t10 t14a 1056*c0909341SAndroid Build Coastguard Worker vpblendd m4, m1, 0x33 ; t3a t7 t11 t15a 1057*c0909341SAndroid Build Coastguard Worker ret 1058*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1059*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 1060*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_m2896_2896)] 1061*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_2896_2896)] 1062*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4, m2 1063*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2 1064*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5, m4 1065*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m6 1066*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1 1067*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m6 1068*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m8}, m5, m1, m2, m4 1069*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m5, m2, m1, m4 1070*c0909341SAndroid Build Coastguard Worker packssdw m2, m5 ; -out11 out8 out10 -out9 1071*c0909341SAndroid Build Coastguard Worker packssdw m1, m4 ; -out7 out4 out6 -out5 1072*c0909341SAndroid Build Coastguard Worker ret 1073*c0909341SAndroid Build Coastguard Worker 1074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct 1075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst 1076*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst 1077*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity 1078*c0909341SAndroid Build Coastguard Worker 1079*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 1080*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 1081*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 1082*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 1083*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 1084*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 1085*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_16384)] 1086*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m1, m0 1087*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0 1088*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3, m2 1089*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 1090*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m4, m1, m0, m3 1091*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m1 1092*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m1 1093*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m4 1094*c0909341SAndroid Build Coastguard Worker punpckldq m0, m4 1095*c0909341SAndroid Build Coastguard Worker jmp tx2q 1096*c0909341SAndroid Build Coastguard Worker.pass2: 1097*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main 1098*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2896x8)] 1099*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m4 1100*c0909341SAndroid Build Coastguard Worker psubsw m2, m4 1101*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 ; -out7 out4 out6 -out5 1102*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 ; out8 -out11 -out9 out10 1103*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_2048)] 1104*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 1105*c0909341SAndroid Build Coastguard Worker vpblendd m4, m0, m2, 0x33 1106*c0909341SAndroid Build Coastguard Worker vpblendd m0, m1, 0xcc 1107*c0909341SAndroid Build Coastguard Worker vpblendd m1, m3, 0xcc 1108*c0909341SAndroid Build Coastguard Worker vpblendd m2, m3, 0x33 1109*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 1110*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q0213 1111*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q2031 1112*c0909341SAndroid Build Coastguard Worker vpermq m3, m4, q1302 1113*c0909341SAndroid Build Coastguard Worker psubw m5, m7, m6 1114*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end 1115*c0909341SAndroid Build Coastguard Worker 1116*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct 1117*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst 1118*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst 1119*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity 1120*c0909341SAndroid Build Coastguard Worker 1121*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 1122*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*0] 1123*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*1] 1124*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*2] 1125*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*3] 1126*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_1697x8)] 1127*c0909341SAndroid Build Coastguard Worker pcmpeqw m0, m0 ; -1 1128*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m2 1129*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 1130*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m5 1131*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 1132*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m8, m1 1133*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m8, m2 1134*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m8, m3 1135*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m4 1136*c0909341SAndroid Build Coastguard Worker pcmpeqw m9, m0, m1 ; we want to do a signed avg, but pavgw is 1137*c0909341SAndroid Build Coastguard Worker pxor m1, m9 ; unsigned. as long as both signs are equal 1138*c0909341SAndroid Build Coastguard Worker pcmpeqw m9, m0, m2 ; it still works, but if the input is -1 the 1139*c0909341SAndroid Build Coastguard Worker pxor m2, m9 ; pmulhrsw result will become 0 which causes 1140*c0909341SAndroid Build Coastguard Worker pcmpeqw m9, m0, m3 ; pavgw to output -32768 instead of 0 unless 1141*c0909341SAndroid Build Coastguard Worker pxor m3, m9 ; we explicitly deal with that case here. 1142*c0909341SAndroid Build Coastguard Worker pcmpeqw m0, m4 1143*c0909341SAndroid Build Coastguard Worker pxor m4, m0 1144*c0909341SAndroid Build Coastguard Worker pavgw m1, m5 1145*c0909341SAndroid Build Coastguard Worker pavgw m2, m6 1146*c0909341SAndroid Build Coastguard Worker pavgw m3, m7 1147*c0909341SAndroid Build Coastguard Worker pavgw m4, m8 1148*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 1149*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 1150*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1151*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1152*c0909341SAndroid Build Coastguard Worker jmp tx2q 1153*c0909341SAndroid Build Coastguard Worker.pass2: 1154*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_1697x16)] 1155*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2048)] 1156*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m8, m0 1157*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m8, m1 1158*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m8, m2 1159*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m3 1160*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x}, m0, m1, m2, m3 1161*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 1162*c0909341SAndroid Build Coastguard Worker paddsw m1, m6 1163*c0909341SAndroid Build Coastguard Worker paddsw m2, m7 1164*c0909341SAndroid Build Coastguard Worker paddsw m3, m8 1165*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_8bpc).end2 1166*c0909341SAndroid Build Coastguard Worker 1167*c0909341SAndroid Build Coastguard Worker%macro WRITE_8X4 4-7 strideq*1, strideq*2, r3 ; coefs[1-2], tmp[1-2], off[1-3] 1168*c0909341SAndroid Build Coastguard Worker movq xm%3, [dstq ] 1169*c0909341SAndroid Build Coastguard Worker movhps xm%3, [dstq+%5] 1170*c0909341SAndroid Build Coastguard Worker movq xm%4, [dstq+%6] 1171*c0909341SAndroid Build Coastguard Worker movhps xm%4, [dstq+%7] 1172*c0909341SAndroid Build Coastguard Worker pmovzxbw m%3, xm%3 1173*c0909341SAndroid Build Coastguard Worker pmovzxbw m%4, xm%4 1174*c0909341SAndroid Build Coastguard Worker%ifnum %1 1175*c0909341SAndroid Build Coastguard Worker paddw m%3, m%1 1176*c0909341SAndroid Build Coastguard Worker%else 1177*c0909341SAndroid Build Coastguard Worker paddw m%3, %1 1178*c0909341SAndroid Build Coastguard Worker%endif 1179*c0909341SAndroid Build Coastguard Worker%ifnum %2 1180*c0909341SAndroid Build Coastguard Worker paddw m%4, m%2 1181*c0909341SAndroid Build Coastguard Worker%else 1182*c0909341SAndroid Build Coastguard Worker paddw m%4, %2 1183*c0909341SAndroid Build Coastguard Worker%endif 1184*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%4 1185*c0909341SAndroid Build Coastguard Worker vextracti128 xm%4, m%3, 1 1186*c0909341SAndroid Build Coastguard Worker movq [dstq ], xm%3 1187*c0909341SAndroid Build Coastguard Worker movhps [dstq+%6], xm%3 1188*c0909341SAndroid Build Coastguard Worker movq [dstq+%5], xm%4 1189*c0909341SAndroid Build Coastguard Worker movhps [dstq+%7], xm%4 1190*c0909341SAndroid Build Coastguard Worker%endmacro 1191*c0909341SAndroid Build Coastguard Worker 1192*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X4_FN 2 ; type1, type2 1193*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x4 1194*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1195*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 1196*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 1197*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 1198*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 1199*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_8bpc).dconly2 1200*c0909341SAndroid Build Coastguard Worker%endif 1201*c0909341SAndroid Build Coastguard Worker%endmacro 1202*c0909341SAndroid Build Coastguard Worker 1203*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct 1204*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst 1205*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst 1206*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity 1207*c0909341SAndroid Build Coastguard Worker 1208*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1209*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm3, [o(pw_2896x8)] 1210*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm3, [cq+16*0] 1211*c0909341SAndroid Build Coastguard Worker pmulhrsw xm1, xm3, [cq+16*1] 1212*c0909341SAndroid Build Coastguard Worker pmulhrsw xm2, xm3, [cq+16*2] 1213*c0909341SAndroid Build Coastguard Worker pmulhrsw xm3, [cq+16*3] 1214*c0909341SAndroid Build Coastguard Worker call m(idct_4x8_internal_8bpc).main 1215*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [o(deint_shuf)] 1216*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m1, xm3, 1 1217*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m0, xm2, 1 1218*c0909341SAndroid Build Coastguard Worker shufps m0, m1, m3, q0220 1219*c0909341SAndroid Build Coastguard Worker shufps m1, m3, q1331 1220*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 1221*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 1222*c0909341SAndroid Build Coastguard Worker jmp tx2q 1223*c0909341SAndroid Build Coastguard Worker.pass2: 1224*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 1225*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 1226*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 1227*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end2 1228*c0909341SAndroid Build Coastguard Worker 1229*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct 1230*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst 1231*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst 1232*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity 1233*c0909341SAndroid Build Coastguard Worker 1234*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1235*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm0, [o(pw_2896x8)] 1236*c0909341SAndroid Build Coastguard Worker pshufd xm4, [cq+16*0], q1032 1237*c0909341SAndroid Build Coastguard Worker pmulhrsw xm3, xm0, [cq+16*3] 1238*c0909341SAndroid Build Coastguard Worker pshufd xm5, [cq+16*1], q1032 1239*c0909341SAndroid Build Coastguard Worker pmulhrsw xm2, xm0, [cq+16*2] 1240*c0909341SAndroid Build Coastguard Worker pmulhrsw xm4, xm0 1241*c0909341SAndroid Build Coastguard Worker pmulhrsw xm5, xm0 1242*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main_pass1 1243*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 1244*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm3, 1 1245*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 1246*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1247*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1248*c0909341SAndroid Build Coastguard Worker psubsw m3, m2 1249*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 1250*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 1251*c0909341SAndroid Build Coastguard Worker jmp tx2q 1252*c0909341SAndroid Build Coastguard Worker.pass2: 1253*c0909341SAndroid Build Coastguard Worker call .main 1254*c0909341SAndroid Build Coastguard Worker.end: 1255*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 1256*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 1257*c0909341SAndroid Build Coastguard Worker.end2: 1258*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_2048)] 1259*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2 1260*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 1261*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 1262*c0909341SAndroid Build Coastguard Worker.end3: 1263*c0909341SAndroid Build Coastguard Worker pxor m2, m2 1264*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m2 1265*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m2 1266*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 1267*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 4, 5 1268*c0909341SAndroid Build Coastguard Worker RET 1269*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1270*c0909341SAndroid Build Coastguard Workercglobal_label .main 1271*c0909341SAndroid Build Coastguard Worker IADST4_1D_PACKED 1272*c0909341SAndroid Build Coastguard Worker ret 1273*c0909341SAndroid Build Coastguard Worker 1274*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct 1275*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst 1276*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst 1277*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity 1278*c0909341SAndroid Build Coastguard Worker 1279*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1280*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm0, [o(pw_2896x8)] 1281*c0909341SAndroid Build Coastguard Worker pshufd xm4, [cq+16*0], q1032 1282*c0909341SAndroid Build Coastguard Worker pmulhrsw xm3, xm0, [cq+16*3] 1283*c0909341SAndroid Build Coastguard Worker pshufd xm5, [cq+16*1], q1032 1284*c0909341SAndroid Build Coastguard Worker pmulhrsw xm2, xm0, [cq+16*2] 1285*c0909341SAndroid Build Coastguard Worker pmulhrsw xm4, xm0 1286*c0909341SAndroid Build Coastguard Worker pmulhrsw xm5, xm0 1287*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_8bpc).main_pass1 1288*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm1, 1 1289*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm0, 1 1290*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3, m2 1291*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2 1292*c0909341SAndroid Build Coastguard Worker pxor m0, m0 1293*c0909341SAndroid Build Coastguard Worker psubsw m0, m1 1294*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m3 1295*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3 1296*c0909341SAndroid Build Coastguard Worker jmp tx2q 1297*c0909341SAndroid Build Coastguard Worker.pass2: 1298*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_8bpc).main 1299*c0909341SAndroid Build Coastguard Worker mova m2, m1 1300*c0909341SAndroid Build Coastguard Worker vpermq m1, m0, q2031 1301*c0909341SAndroid Build Coastguard Worker vpermq m0, m2, q2031 1302*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end2 1303*c0909341SAndroid Build Coastguard Worker 1304*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct 1305*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst 1306*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst 1307*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity 1308*c0909341SAndroid Build Coastguard Worker 1309*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1310*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*0] 1311*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*1] 1312*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*2], 1 1313*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*3], 1 1314*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_2896x8)] 1315*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m0 1316*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0 1317*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3 1318*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3 1319*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 1320*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 1321*c0909341SAndroid Build Coastguard Worker paddsw m0, m0 1322*c0909341SAndroid Build Coastguard Worker paddsw m1, m1 1323*c0909341SAndroid Build Coastguard Worker jmp tx2q 1324*c0909341SAndroid Build Coastguard Worker.pass2: 1325*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_1697x8)] 1326*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3, m0 1327*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 1328*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 1329*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 1330*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).end 1331*c0909341SAndroid Build Coastguard Worker 1332*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2 ; type1, type2 1333*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x8 1334*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1335*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 1336*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 1337*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 1338*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 1339*c0909341SAndroid Build Coastguard Worker or r3d, 8 1340*c0909341SAndroid Build Coastguard Worker.dconly: 1341*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 1342*c0909341SAndroid Build Coastguard Worker.dconly2: 1343*c0909341SAndroid Build Coastguard Worker movd xm2, [pw_2048] 1344*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 1345*c0909341SAndroid Build Coastguard Worker lea r2, [strideq*3] 1346*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 1347*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 1348*c0909341SAndroid Build Coastguard Worker.dconly_loop: 1349*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 0, 1, 2, strideq*1, strideq*2, r2 1350*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1351*c0909341SAndroid Build Coastguard Worker sub r3d, 4 1352*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 1353*c0909341SAndroid Build Coastguard Worker RET 1354*c0909341SAndroid Build Coastguard Worker%endif 1355*c0909341SAndroid Build Coastguard Worker%endmacro 1356*c0909341SAndroid Build Coastguard Worker 1357*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct 1358*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst 1359*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst 1360*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity 1361*c0909341SAndroid Build Coastguard Worker 1362*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1363*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 ; 0 1 1364*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 ; 6 7 1365*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 ; 4 5 1366*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 ; 2 3 1367*c0909341SAndroid Build Coastguard Worker call .main 1368*c0909341SAndroid Build Coastguard Worker shufps m4, m0, m1, q0220 1369*c0909341SAndroid Build Coastguard Worker shufps m5, m0, m1, q1331 1370*c0909341SAndroid Build Coastguard Worker shufps m1, m2, m3, q0220 1371*c0909341SAndroid Build Coastguard Worker shufps m3, m2, m3, q1331 1372*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [o(deint_shuf)] 1373*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(pw_16384)] 1374*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m0}, m4, m5, m1, m3 1375*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m2}, m4, m5, m1, m3 1376*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm1, 1 1377*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m1, 0x31 1378*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m5, xm3, 1 1379*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m5, m3, 0x31 1380*c0909341SAndroid Build Coastguard Worker jmp tx2q 1381*c0909341SAndroid Build Coastguard Worker.pass2: 1382*c0909341SAndroid Build Coastguard Worker call .main 1383*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 1384*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 1385*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 1386*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 1387*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 1388*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end2 1389*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1390*c0909341SAndroid Build Coastguard Workercglobal_label .main 1391*c0909341SAndroid Build Coastguard Worker IDCT8_1D_PACKED 1392*c0909341SAndroid Build Coastguard Worker ret 1393*c0909341SAndroid Build Coastguard Worker 1394*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct 1395*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst 1396*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst 1397*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity 1398*c0909341SAndroid Build Coastguard Worker 1399*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1400*c0909341SAndroid Build Coastguard Worker vpermq m4, [cq+32*0], q1302 ; 1 0 1401*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 ; 6 7 1402*c0909341SAndroid Build Coastguard Worker vpermq m5, [cq+32*1], q1302 ; 3 2 1403*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 ; 4 5 1404*c0909341SAndroid Build Coastguard Worker call .main_pass1 1405*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_16384)] 1406*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0, m1 1407*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 1408*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m3 1409*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 1410*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1411*c0909341SAndroid Build Coastguard Worker psubw m3, m5 ; negate odd elements during rounding 1412*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m5 1413*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3 1414*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 1415*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m3 1416*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m0 1417*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 1418*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 1419*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 1420*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m3, m0, 0x31 1421*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m3, xm0, 1 1422*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m4, m1, 0x31 1423*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m4, xm1, 1 1424*c0909341SAndroid Build Coastguard Worker jmp tx2q 1425*c0909341SAndroid Build Coastguard Worker.pass2: 1426*c0909341SAndroid Build Coastguard Worker pshufd m4, m0, q1032 1427*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q1032 1428*c0909341SAndroid Build Coastguard Worker call .main_pass2 1429*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_2048)] 1430*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm4, [o(pw_4096)] 1431*c0909341SAndroid Build Coastguard Worker psubw m4, m5 ; lower half = 2048, upper half = -2048 1432*c0909341SAndroid Build Coastguard Worker.end: 1433*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 1434*c0909341SAndroid Build Coastguard Worker.end2: 1435*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 1436*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 1437*c0909341SAndroid Build Coastguard Worker.end3: 1438*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 1439*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 1440*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 1441*c0909341SAndroid Build Coastguard Worker.end4: 1442*c0909341SAndroid Build Coastguard Worker pxor m4, m4 1443*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 1444*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 1445*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m4 1446*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m4 1447*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 1448*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 4, 5 1449*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1450*c0909341SAndroid Build Coastguard Worker WRITE_8X4 2, 3, 4, 5 1451*c0909341SAndroid Build Coastguard Worker RET 1452*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1453*c0909341SAndroid Build Coastguard Worker.main_pass1: 1454*c0909341SAndroid Build Coastguard Worker IADST8_1D_PACKED 1 1455*c0909341SAndroid Build Coastguard Worker ret 1456*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1457*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2 1458*c0909341SAndroid Build Coastguard Worker IADST8_1D_PACKED 2 1459*c0909341SAndroid Build Coastguard Worker ret 1460*c0909341SAndroid Build Coastguard Worker 1461*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct 1462*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst 1463*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst 1464*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity 1465*c0909341SAndroid Build Coastguard Worker 1466*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1467*c0909341SAndroid Build Coastguard Worker vpermq m4, [cq+32*0], q1302 ; 1 0 1468*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 ; 6 7 1469*c0909341SAndroid Build Coastguard Worker vpermq m5, [cq+32*1], q1302 ; 3 2 1470*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 ; 4 5 1471*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass1 1472*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [o(pw_16384)] 1473*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m2 1474*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2 1475*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m0 1476*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m0 1477*c0909341SAndroid Build Coastguard Worker pxor m0, m0 1478*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1479*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m0 1480*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 1481*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m0 1482*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 1483*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m4, m3 1484*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3 1485*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2, m1 1486*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 1487*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m0, xm3, 1 1488*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m3, 0x31 1489*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm2, 1 1490*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m2, 0x31 1491*c0909341SAndroid Build Coastguard Worker jmp tx2q 1492*c0909341SAndroid Build Coastguard Worker.pass2: 1493*c0909341SAndroid Build Coastguard Worker pshufd m4, m0, q1032 1494*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q1032 1495*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass2 1496*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 1497*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [o(pw_4096)] 1498*c0909341SAndroid Build Coastguard Worker psubw m4, m5 ; lower half = -2048, upper half = 2048 1499*c0909341SAndroid Build Coastguard Worker vpermq m5, m3, q2031 1500*c0909341SAndroid Build Coastguard Worker vpermq m3, m0, q2031 1501*c0909341SAndroid Build Coastguard Worker vpermq m0, m2, q2031 1502*c0909341SAndroid Build Coastguard Worker vpermq m2, m1, q2031 1503*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m0, m4 1504*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m5, m4 1505*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end3 1506*c0909341SAndroid Build Coastguard Worker 1507*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct 1508*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst 1509*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst 1510*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity 1511*c0909341SAndroid Build Coastguard Worker 1512*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_8bpc, 0, 5, 7, dst, stride, c, eob, tx2 1513*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+16*0] 1514*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*1] 1515*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+16*4], 1 1516*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*5], 1 1517*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+16*2] 1518*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*3] 1519*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16*6], 1 1520*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*7], 1 1521*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m2 1522*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 1523*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m0 1524*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 1525*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 1526*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 1527*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1528*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1529*c0909341SAndroid Build Coastguard Worker jmp tx2q 1530*c0909341SAndroid Build Coastguard Worker.pass2: 1531*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_4096)] 1532*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).end 1533*c0909341SAndroid Build Coastguard Worker 1534*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2 ; type1, type2 1535*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 8x16 1536*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1537*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 1538*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 1539*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 1540*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 1541*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 1542*c0909341SAndroid Build Coastguard Worker or r3d, 16 1543*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_8bpc).dconly 1544*c0909341SAndroid Build Coastguard Worker%endif 1545*c0909341SAndroid Build Coastguard Worker%endmacro 1546*c0909341SAndroid Build Coastguard Worker 1547*c0909341SAndroid Build Coastguard Worker%macro ITX_8X16_LOAD_COEFS 0 1548*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2896x8)] 1549*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, [cq+32*0] 1550*c0909341SAndroid Build Coastguard Worker add cq, 32*4 1551*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m4, [cq+32*3] 1552*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4, [cq-32*3] 1553*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m4, [cq+32*2] 1554*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4, [cq-32*2] 1555*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m4, [cq+32*1] 1556*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4, [cq-32*1] 1557*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, [cq+32*0] 1558*c0909341SAndroid Build Coastguard Worker%endmacro 1559*c0909341SAndroid Build Coastguard Worker 1560*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct 1561*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst 1562*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst 1563*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity 1564*c0909341SAndroid Build Coastguard Worker 1565*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 1566*c0909341SAndroid Build Coastguard Worker ITX_8X16_LOAD_COEFS 1567*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 1568*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_16384)] 1569*c0909341SAndroid Build Coastguard Worker.pass1_end: 1570*c0909341SAndroid Build Coastguard Worker vperm2i128 m9, m3, m7, 0x31 1571*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm7, 1 1572*c0909341SAndroid Build Coastguard Worker vperm2i128 m8, m2, m6, 0x31 1573*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm6, 1 1574*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m1, m5, 0x31 1575*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm5, 1 1576*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m0, m4, 0x31 1577*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 1578*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2, m3 1579*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 1580*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 1581*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1582*c0909341SAndroid Build Coastguard Worker.pass1_end2: 1583*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m5, m6 1584*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 1585*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m8, m9 1586*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m9 1587*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m2, m0, m4, m3, m5, m6, m7, m8 1588*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 1589*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 1590*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1591*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1592*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5, m6 1593*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m6 1594*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m8 1595*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m8 1596*c0909341SAndroid Build Coastguard Worker jmp tx2q 1597*c0909341SAndroid Build Coastguard Worker.pass2: 1598*c0909341SAndroid Build Coastguard Worker call .main 1599*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m2, m4, m6 1600*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q2031}, m1, m3, m5, m7 1601*c0909341SAndroid Build Coastguard Worker.end: 1602*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2048)] 1603*c0909341SAndroid Build Coastguard Worker.end2: 1604*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 1605*c0909341SAndroid Build Coastguard Worker.end3: 1606*c0909341SAndroid Build Coastguard Worker pxor m8, m8 1607*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, -4, -3, -2, -1, 0, 1, 2, 3 1608*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 1609*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 8, 9 1610*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1611*c0909341SAndroid Build Coastguard Worker WRITE_8X4 2, 3, 0, 1 1612*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1613*c0909341SAndroid Build Coastguard Worker WRITE_8X4 4, 5, 0, 1 1614*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1615*c0909341SAndroid Build Coastguard Worker WRITE_8X4 6, 7, 0, 1 1616*c0909341SAndroid Build Coastguard Worker RET 1617*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1618*c0909341SAndroid Build Coastguard Workercglobal_label .main 1619*c0909341SAndroid Build Coastguard Worker IDCT16_1D_PACKED 1620*c0909341SAndroid Build Coastguard Worker ret 1621*c0909341SAndroid Build Coastguard Worker 1622*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct 1623*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst 1624*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst 1625*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity 1626*c0909341SAndroid Build Coastguard Worker 1627*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 1628*c0909341SAndroid Build Coastguard Worker ITX_8X16_LOAD_COEFS 1629*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 1630*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 1631*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_16384)] 1632*c0909341SAndroid Build Coastguard Worker pslld m9, m10, 17 1633*c0909341SAndroid Build Coastguard Worker psubw m10, m9 ; 16384, -16384 1634*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass1_end 1635*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1636*c0909341SAndroid Build Coastguard Worker.pass2: 1637*c0909341SAndroid Build Coastguard Worker call .main 1638*c0909341SAndroid Build Coastguard Worker call .main_pass2_end 1639*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2048)] 1640*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm8, [o(pw_4096)] 1641*c0909341SAndroid Build Coastguard Worker psubw m8, m9 1642*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q2031}, m0, m1, m2, m3 1643*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m4, m5, m6, m7 1644*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).end2 1645*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1646*c0909341SAndroid Build Coastguard Workercglobal_label .main 1647*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q1032}, m7, m1, m5, m3 1648*c0909341SAndroid Build Coastguard Worker.main2: 1649*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 1650*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m7, m0 ; in14 in1 1651*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7 ; in0 in15 1652*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m6, m1 ; in12 in3 1653*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6 ; in2 in13 1654*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5, m2 ; in10 in5 1655*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; in4 in11 1656*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m4, m3 ; in8 in7 1657*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; in6 in9 1658*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 4, 9, 10, 201, 4091, 3 ; t0 t1 1659*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 4, 9, 10, 995, 3973, 3 ; t2 t3 1660*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 4, 9, 10, 1751, 3703, 3 ; t4 t5 1661*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 4, 9, 10, 2440, 3290, 3 ; t6 t7 1662*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 4, 9, 10, 3035, 2751, 3 ; t8 t9 1663*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 6, 4, 9, 10, 3513, 2106, 3 ; t10 t11 1664*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 4, 9, 10, 3857, 1380, 3 ; t12 t13 1665*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 8, 4, 9, 10, 4052, 601, 3 ; t14 t15 1666*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m5 ; t9a t8a 1667*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ; t1a t0a 1668*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m6 ; t11a t10a 1669*c0909341SAndroid Build Coastguard Worker paddsw m1, m6 ; t3a t2a 1670*c0909341SAndroid Build Coastguard Worker psubsw m6, m2, m7 ; t13a t12a 1671*c0909341SAndroid Build Coastguard Worker paddsw m2, m7 ; t5a t4a 1672*c0909341SAndroid Build Coastguard Worker psubsw m7, m3, m8 ; t15a t14a 1673*c0909341SAndroid Build Coastguard Worker paddsw m3, m8 ; t7a t6a 1674*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_m4017_799)] 1675*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_799_4017)] 1676*c0909341SAndroid Build Coastguard Worker pxor m9, m9 1677*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 4, 8, _, 10, 11, 12, 6 ; t8 t9 1678*c0909341SAndroid Build Coastguard Worker psubw m8, m9, m11 ; pw_4017_m799 1679*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 6, 12, _, 10, 12, 8, 6 ; t12 t13 1680*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_m2276_3406)] 1681*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_3406_2276)] 1682*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 8, _, 10, 11, 12, 6 ; t10 t11 1683*c0909341SAndroid Build Coastguard Worker psubw m8, m9, m11 ; pw_2276_m3406 1684*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 7, 12, _, 10, 12, 8, 6 ; t14 t15 1685*c0909341SAndroid Build Coastguard Worker psubsw m8, m1, m3 ; t7 t6 1686*c0909341SAndroid Build Coastguard Worker paddsw m1, m3 ; t3 t2 1687*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m2 ; t5 t4 1688*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ; t1 t0 1689*c0909341SAndroid Build Coastguard Worker psubsw m2, m5, m7 ; t14a t15a 1690*c0909341SAndroid Build Coastguard Worker paddsw m7, m5 ; t10a t11a 1691*c0909341SAndroid Build Coastguard Worker psubsw m5, m4, m6 ; t12a t13a 1692*c0909341SAndroid Build Coastguard Worker paddsw m4, m6 ; t8a t9a 1693*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_m3784_1567)] 1694*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_1567_3784)] 1695*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 3, 6, _, 10, 12, 11, 6 ; t5a t4a 1696*c0909341SAndroid Build Coastguard Worker psubw m6, m9, m11 ; pw_3784_m1567 1697*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 8, 6, _, 10, 6, 12, 6 ; t7a t6a 1698*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_m1567_3784)] 1699*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_3784_1567)] 1700*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 2, 6, _, 10, 11, 12, 6 ; t15 t14 1701*c0909341SAndroid Build Coastguard Worker psubw m6, m9, m11 ; pw_1567_m3784 1702*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 5, 12, _, 10, 12, 6, 6 ; t13 t12 1703*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m12, [o(deint_shuf)] 1704*c0909341SAndroid Build Coastguard Worker paddsw m6, m4, m7 ; -out1 out14 1705*c0909341SAndroid Build Coastguard Worker psubsw m4, m7 ; t10 t11 1706*c0909341SAndroid Build Coastguard Worker psubsw m11, m3, m8 ; t7 t6 1707*c0909341SAndroid Build Coastguard Worker paddsw m8, m3 ; out12 -out3 1708*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m1 ; t3a t2a 1709*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; -out15 out0 1710*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m5 ; -out13 out2 1711*c0909341SAndroid Build Coastguard Worker psubsw m5, m2 ; t15a t14a 1712*c0909341SAndroid Build Coastguard Worker pshufb m0, m12 1713*c0909341SAndroid Build Coastguard Worker pshufb m6, m12 1714*c0909341SAndroid Build Coastguard Worker pshufb m8, m12 1715*c0909341SAndroid Build Coastguard Worker pshufb m1, m12 1716*c0909341SAndroid Build Coastguard Worker shufps m7, m6, m0, q1032 ; out14 -out15 1717*c0909341SAndroid Build Coastguard Worker vpblendd m0, m6, 0x33 ; -out1 out0 1718*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m8, m1 ; out12 -out13 1719*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m8, m1 ; -out3 out2 1720*c0909341SAndroid Build Coastguard Worker ret 1721*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1722*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 1723*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_m2896_2896)] 1724*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_2896_2896)] 1725*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m8, m11 ; -out11 1726*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m12, m5 ; -out5 1727*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m8 ; out10 1728*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m12 ; out4 1729*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10}, m9, m5, m2, m11 1730*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m9, m5, m2, m11 1731*c0909341SAndroid Build Coastguard Worker packssdw m5, m9 ; out10 -out11 1732*c0909341SAndroid Build Coastguard Worker packssdw m2, m11 ; -out5 out4 1733*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m8, m3 ; out8 1734*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2896_m2896)] 1735*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m12 ; -out7 1736*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m4 ; -out9 1737*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m12 ; out6 1738*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10}, m11, m3, m8, m4 1739*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m11, m3, m8, m4 1740*c0909341SAndroid Build Coastguard Worker packssdw m3, m4 ; -out7 out6 1741*c0909341SAndroid Build Coastguard Worker packssdw m4, m11, m8 ; out8 -out9 1742*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_16384)] 1743*c0909341SAndroid Build Coastguard Worker pxor m9, m9 1744*c0909341SAndroid Build Coastguard Worker ret 1745*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1746*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 1747*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2896x8)] 1748*c0909341SAndroid Build Coastguard Worker pshufb m2, m11, m12 1749*c0909341SAndroid Build Coastguard Worker pshufb m5, m12 1750*c0909341SAndroid Build Coastguard Worker pshufb m3, m12 1751*c0909341SAndroid Build Coastguard Worker pshufb m4, m12 1752*c0909341SAndroid Build Coastguard Worker punpcklqdq m11, m5, m2 ; t15a t7 1753*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m2 ; t14a t6 1754*c0909341SAndroid Build Coastguard Worker shufps m2, m3, m4, q1032 ; t2a t10 1755*c0909341SAndroid Build Coastguard Worker vpblendd m3, m4, 0xcc ; t3a t11 1756*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m3 ; out8 -out9 1757*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ; -out7 out6 1758*c0909341SAndroid Build Coastguard Worker paddsw m2, m5, m11 ; -out5 out4 1759*c0909341SAndroid Build Coastguard Worker psubsw m5, m11 ; out10 -out11 1760*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m2, m3, m4, m5 1761*c0909341SAndroid Build Coastguard Worker ret 1762*c0909341SAndroid Build Coastguard Worker 1763*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct 1764*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst 1765*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst 1766*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity 1767*c0909341SAndroid Build Coastguard Worker 1768*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 1769*c0909341SAndroid Build Coastguard Worker ITX_8X16_LOAD_COEFS 1770*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 1771*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass1_end 1772*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_16384)] 1773*c0909341SAndroid Build Coastguard Worker pslld m10, m9, 17 1774*c0909341SAndroid Build Coastguard Worker psubw m10, m9 ; -16384, 16384 1775*c0909341SAndroid Build Coastguard Worker vperm2i128 m9, m4, m0, 0x31 1776*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm0, 1 1777*c0909341SAndroid Build Coastguard Worker vperm2i128 m8, m5, m1, 0x31 1778*c0909341SAndroid Build Coastguard Worker vinserti128 m4, m5, xm1, 1 1779*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m7, m3, 0x31 1780*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m7, xm3, 1 1781*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m6, xm2, 1 1782*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m6, m2, 0x31 1783*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m0 1784*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 1785*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m3, m1 1786*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m1 1787*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).pass1_end2 1788*c0909341SAndroid Build Coastguard Worker.pass2: 1789*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main 1790*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main_pass2_end 1791*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2048)] 1792*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm9, [o(pw_4096)] 1793*c0909341SAndroid Build Coastguard Worker psubw m8, m9 1794*c0909341SAndroid Build Coastguard Worker vpermq m9, m0, q3120 1795*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, q2031 1796*c0909341SAndroid Build Coastguard Worker vpermq m7, m1, q3120 1797*c0909341SAndroid Build Coastguard Worker vpermq m1, m6, q2031 1798*c0909341SAndroid Build Coastguard Worker vpermq m6, m2, q3120 1799*c0909341SAndroid Build Coastguard Worker vpermq m2, m5, q2031 1800*c0909341SAndroid Build Coastguard Worker vpermq m5, m3, q3120 1801*c0909341SAndroid Build Coastguard Worker vpermq m3, m4, q2031 1802*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 1803*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m8 1804*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m8 1805*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m8 1806*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m5, m8 1807*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m6, m8 1808*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m8 1809*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, m8 1810*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).end3 1811*c0909341SAndroid Build Coastguard Worker 1812*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct 1813*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst 1814*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst 1815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity 1816*c0909341SAndroid Build Coastguard Worker 1817*c0909341SAndroid Build Coastguard Worker%macro IDTX16 3-4 ; src/dst, tmp, pw_1697x16, [pw_16394] 1818*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3, m%1 1819*c0909341SAndroid Build Coastguard Worker%if %0 == 4 ; if downshifting by 1 1820*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%4 1821*c0909341SAndroid Build Coastguard Worker%else 1822*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%1 1823*c0909341SAndroid Build Coastguard Worker%endif 1824*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%2 1825*c0909341SAndroid Build Coastguard Worker%endmacro 1826*c0909341SAndroid Build Coastguard Worker 1827*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 1828*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+16*0] 1829*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*2] 1830*c0909341SAndroid Build Coastguard Worker add cq, 16*8 1831*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+16*0], 1 1832*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*2], 1 1833*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2896x8)] 1834*c0909341SAndroid Build Coastguard Worker mova xm4, [cq-16*4] 1835*c0909341SAndroid Build Coastguard Worker mova xm5, [cq-16*2] 1836*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16*4], 1 1837*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+16*6], 1 1838*c0909341SAndroid Build Coastguard Worker mova xm7, [cq-16*7] 1839*c0909341SAndroid Build Coastguard Worker mova xm6, [cq-16*5] 1840*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+16*1], 1 1841*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+16*3], 1 1842*c0909341SAndroid Build Coastguard Worker mova xm8, [cq-16*3] 1843*c0909341SAndroid Build Coastguard Worker mova xm0, [cq-16*1] 1844*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [cq+16*5], 1 1845*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*7], 1 1846*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m2 1847*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 1848*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m5 1849*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 1850*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m7, m6 1851*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m6 1852*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m8, m0 1853*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0 1854*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m1, m2, m3, m4, m5, m6, m7, m8 1855*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 1856*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 1857*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1858*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1859*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5, m6 1860*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m6 1861*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m8 1862*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m8 1863*c0909341SAndroid Build Coastguard Worker jmp tx2q 1864*c0909341SAndroid Build Coastguard Worker.pass2: 1865*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_1697x16)] 1866*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3, m4, m5, m6, m7 1867*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 9, 8}, 0, 1, 2, 3, 4, 5, 6, 7 1868*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_8bpc).end 1869*c0909341SAndroid Build Coastguard Worker 1870*c0909341SAndroid Build Coastguard Worker%macro WRITE_16X2 6 ; coefs[1-2], tmp[1-2], offset[1-2] 1871*c0909341SAndroid Build Coastguard Worker pmovzxbw m%3, [dstq+%5] 1872*c0909341SAndroid Build Coastguard Worker%ifnum %1 1873*c0909341SAndroid Build Coastguard Worker paddw m%3, m%1 1874*c0909341SAndroid Build Coastguard Worker%else 1875*c0909341SAndroid Build Coastguard Worker paddw m%3, %1 1876*c0909341SAndroid Build Coastguard Worker%endif 1877*c0909341SAndroid Build Coastguard Worker pmovzxbw m%4, [dstq+%6] 1878*c0909341SAndroid Build Coastguard Worker%ifnum %2 1879*c0909341SAndroid Build Coastguard Worker paddw m%4, m%2 1880*c0909341SAndroid Build Coastguard Worker%else 1881*c0909341SAndroid Build Coastguard Worker paddw m%4, %2 1882*c0909341SAndroid Build Coastguard Worker%endif 1883*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%4 1884*c0909341SAndroid Build Coastguard Worker vpermq m%3, m%3, q3120 1885*c0909341SAndroid Build Coastguard Worker mova [dstq+%5], xm%3 1886*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+%6], m%3, 1 1887*c0909341SAndroid Build Coastguard Worker%endmacro 1888*c0909341SAndroid Build Coastguard Worker 1889*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X4_FN 2 ; type1, type2 1890*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x4 1891*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1892*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 1893*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 1894*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 1895*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 1896*c0909341SAndroid Build Coastguard Worker or r3d, 4 1897*c0909341SAndroid Build Coastguard Worker.dconly: 1898*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 1899*c0909341SAndroid Build Coastguard Worker movd xm2, [pw_2048] ; intentionally rip-relative 1900*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 1901*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 1902*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 1903*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1904*c0909341SAndroid Build Coastguard Worker.dconly_loop: 1905*c0909341SAndroid Build Coastguard Worker mova xm1, [dstq+strideq*0] 1906*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [dstq+strideq*1], 1 1907*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m1, m3 1908*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m3 1909*c0909341SAndroid Build Coastguard Worker paddw m2, m0 1910*c0909341SAndroid Build Coastguard Worker paddw m1, m0 1911*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 1912*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm1 1913*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m1, 1 1914*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1915*c0909341SAndroid Build Coastguard Worker sub r3d, 2 1916*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 1917*c0909341SAndroid Build Coastguard Worker RET 1918*c0909341SAndroid Build Coastguard Worker%endif 1919*c0909341SAndroid Build Coastguard Worker%endmacro 1920*c0909341SAndroid Build Coastguard Worker 1921*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct 1922*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst 1923*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst 1924*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity 1925*c0909341SAndroid Build Coastguard Worker 1926*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 1927*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*0] 1928*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16*1] 1929*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*2] 1930*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+16*3] 1931*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+16*4] 1932*c0909341SAndroid Build Coastguard Worker mova xm5, [cq+16*5] 1933*c0909341SAndroid Build Coastguard Worker mova xm6, [cq+16*6] 1934*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+16*7] 1935*c0909341SAndroid Build Coastguard Worker call m(idct_4x16_internal_8bpc).main 1936*c0909341SAndroid Build Coastguard Worker vinserti128 m6, m2, xm6, 1 1937*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m0, xm4, 1 1938*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m1, xm5, 1 1939*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m3, xm7, 1 1940*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2, m6 1941*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m6 1942*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_16384)] 1943*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 1944*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1945*c0909341SAndroid Build Coastguard Worker mova m1, m6 1946*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_8bpc).pass1_end 1947*c0909341SAndroid Build Coastguard Worker.pass2: 1948*c0909341SAndroid Build Coastguard Worker call .main 1949*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_8bpc).end 1950*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1951*c0909341SAndroid Build Coastguard Workercglobal_label .main 1952*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pd_2048)] 1953*c0909341SAndroid Build Coastguard Worker IDCT4_1D 0, 1, 2, 3, 4, 5, 6 1954*c0909341SAndroid Build Coastguard Worker ret 1955*c0909341SAndroid Build Coastguard Worker 1956*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct 1957*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst 1958*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst 1959*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity 1960*c0909341SAndroid Build Coastguard Worker 1961*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 1962*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q1230 1963*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q2103 1964*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q1230 1965*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q2103 1966*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main2 1967*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main_pass1_end 1968*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3, m1 1969*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2, m0 1970*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 1971*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 1972*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_16384)] 1973*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m0, xm2, 1 1974*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m2, 0x31 1975*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm5, 1 1976*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m4, m5, 0x31 1977*c0909341SAndroid Build Coastguard Worker psubw m6, m7, m1 1978*c0909341SAndroid Build Coastguard Worker.pass1_end: 1979*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m1 1980*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 1981*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m1 1982*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6 1983*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m2 1984*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 1985*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m0 1986*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 1987*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 1988*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 1989*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1990*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1991*c0909341SAndroid Build Coastguard Worker jmp tx2q 1992*c0909341SAndroid Build Coastguard Worker.pass2: 1993*c0909341SAndroid Build Coastguard Worker call .main 1994*c0909341SAndroid Build Coastguard Worker.end: 1995*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 1996*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m0, m1, m2, m3 1997*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 1998*c0909341SAndroid Build Coastguard Worker.end2: 1999*c0909341SAndroid Build Coastguard Worker pxor m4, m4 2000*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 2001*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 2002*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m4 2003*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m4 2004*c0909341SAndroid Build Coastguard Worker.end3: 2005*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 4, 5, strideq*0, strideq*1 2006*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 2007*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 4, 5, strideq*0, strideq*1 2008*c0909341SAndroid Build Coastguard Worker RET 2009*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2010*c0909341SAndroid Build Coastguard Workercglobal_label .main 2011*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_m3344_3344)] 2012*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_3803_1321)] 2013*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_m1321_2482)] 2014*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2482_3344)] 2015*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m2, m0 ; in2 in0 l 2016*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0 ; in2 in0 h 2017*c0909341SAndroid Build Coastguard Worker psrld m5, m6, 16 2018*c0909341SAndroid Build Coastguard Worker pmaddwd m10, m6, m4 ; t2:02 l 2019*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2 ; t2:02 h 2020*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m7, m4 ; t0:02 l 2021*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m2 ; t0:02 h 2022*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m8 ; t1:02 l 2023*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m2 ; t1:02 h 2024*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3, m1 ; in3 in1 h 2025*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 ; in3 in1 l 2026*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m5, m2 ; t2:3 h 2027*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 ; t2:3 l 2028*c0909341SAndroid Build Coastguard Worker paddd m6, m1 2029*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pd_2048)] 2030*c0909341SAndroid Build Coastguard Worker paddd m10, m5 2031*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m9, m3 2032*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m2 2033*c0909341SAndroid Build Coastguard Worker paddd m0, m1 2034*c0909341SAndroid Build Coastguard Worker paddd m7, m1 2035*c0909341SAndroid Build Coastguard Worker paddd m0, m5 ; t0 + t3 + 2048 l 2036*c0909341SAndroid Build Coastguard Worker paddd m7, m9 ; t0 + t3 + 2048 h 2037*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_m3803_3344)] 2038*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m9, m2 2039*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m3 2040*c0909341SAndroid Build Coastguard Worker paddd m10, m1 ; t2 + 2048 l 2041*c0909341SAndroid Build Coastguard Worker paddd m6, m1 ; t2 + 2048 h 2042*c0909341SAndroid Build Coastguard Worker paddd m5, m1 ; t1:13 + 2048 h 2043*c0909341SAndroid Build Coastguard Worker paddd m1, m9 ; t1:13 + 2048 l 2044*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_m3803_m6688)] 2045*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 2046*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 2047*c0909341SAndroid Build Coastguard Worker paddd m5, m8 ; t1 + t3 + 2048 h 2048*c0909341SAndroid Build Coastguard Worker paddd m1, m4 ; t1 + t3 + 2048 l 2049*c0909341SAndroid Build Coastguard Worker paddd m8, m7 2050*c0909341SAndroid Build Coastguard Worker paddd m4, m0 2051*c0909341SAndroid Build Coastguard Worker paddd m2, m8 ; t0 + t1 - t3 + 2048 h 2052*c0909341SAndroid Build Coastguard Worker paddd m3, m4 ; t0 + t1 - t3 + 2048 l 2053*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m10, m6, m0, m7, m5, m1, m2, m3 2054*c0909341SAndroid Build Coastguard Worker packssdw m0, m7 2055*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 2056*c0909341SAndroid Build Coastguard Worker packssdw m3, m2 2057*c0909341SAndroid Build Coastguard Worker packssdw m2, m10, m6 2058*c0909341SAndroid Build Coastguard Worker ret 2059*c0909341SAndroid Build Coastguard Worker 2060*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct 2061*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst 2062*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst 2063*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity 2064*c0909341SAndroid Build Coastguard Worker 2065*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 2066*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q1230 2067*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q2103 2068*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q1230 2069*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q2103 2070*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main2 2071*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main_pass1_end 2072*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m2 2073*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m1, m0 2074*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 2075*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 2076*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_16384)] 2077*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m0, xm1, 1 2078*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m1, 0x31 2079*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm5, 1 2080*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m4, m5, 0x31 2081*c0909341SAndroid Build Coastguard Worker psubw m1, m7, m6 2082*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_8bpc).pass1_end 2083*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2084*c0909341SAndroid Build Coastguard Worker.pass2: 2085*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 2086*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pw_2048)] 2087*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m3, m2, m1, m0 2088*c0909341SAndroid Build Coastguard Worker pxor m4, m4 2089*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 2090*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 2091*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m4 2092*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m4 2093*c0909341SAndroid Build Coastguard Worker WRITE_16X2 3, 2, 4, 5, strideq*0, strideq*1 2094*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 2095*c0909341SAndroid Build Coastguard Worker WRITE_16X2 1, 0, 4, 5, strideq*0, strideq*1 2096*c0909341SAndroid Build Coastguard Worker RET 2097*c0909341SAndroid Build Coastguard Worker 2098*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct 2099*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst 2100*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst 2101*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity 2102*c0909341SAndroid Build Coastguard Worker 2103*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_8bpc, 0, 5, 11, dst, stride, c, eob, tx2 2104*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*0] 2105*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+16*1] 2106*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*4], 1 2107*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16*5], 1 2108*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*2] 2109*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16*3] 2110*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*6], 1 2111*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16*7], 1 2112*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_1697x16)] 2113*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_16384)] 2114*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m2, m4 2115*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m4 2116*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0, m1 2117*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 2118*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m2 2119*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 2120*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m0 2121*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 2122*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m1 2123*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m2 2124*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m3 2125*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m4 2126*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m5, m6, m7 2127*c0909341SAndroid Build Coastguard Worker paddsw m1, m0 2128*c0909341SAndroid Build Coastguard Worker paddsw m2, m5 2129*c0909341SAndroid Build Coastguard Worker paddsw m3, m6 2130*c0909341SAndroid Build Coastguard Worker paddsw m4, m7 2131*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1, m2 2132*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m2 2133*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 2134*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 2135*c0909341SAndroid Build Coastguard Worker jmp tx2q 2136*c0909341SAndroid Build Coastguard Worker.pass2: 2137*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_1697x8)] 2138*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, m0 2139*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m1 2140*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m2 2141*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m3 2142*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 2143*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 2144*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 2145*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 2146*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_8bpc).end 2147*c0909341SAndroid Build Coastguard Worker 2148*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2 ; type1, type2 2149*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x8 2150*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2151*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 2152*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 2153*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 2154*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 2155*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 2156*c0909341SAndroid Build Coastguard Worker or r3d, 8 2157*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 2158*c0909341SAndroid Build Coastguard Worker%endif 2159*c0909341SAndroid Build Coastguard Worker%endmacro 2160*c0909341SAndroid Build Coastguard Worker 2161*c0909341SAndroid Build Coastguard Worker%macro ITX_16X8_LOAD_COEFS 1 ; shuf_odd 2162*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2896x8)] 2163*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 2164*c0909341SAndroid Build Coastguard Worker add cq, 32*4 2165*c0909341SAndroid Build Coastguard Worker vpermq m7, [cq+32*3], q%1 2166*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq-32*3], q%1 2167*c0909341SAndroid Build Coastguard Worker vpermq m6, [cq+32*2], q3120 2168*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq-32*2], q3120 2169*c0909341SAndroid Build Coastguard Worker vpermq m5, [cq+32*1], q%1 2170*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq-32*1], q%1 2171*c0909341SAndroid Build Coastguard Worker vpermq m4, [cq+32*0], q3120 2172*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m7, m1, m6, m2, m5, m3, m4 2173*c0909341SAndroid Build Coastguard Worker%endmacro 2174*c0909341SAndroid Build Coastguard Worker 2175*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct 2176*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst 2177*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst 2178*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity 2179*c0909341SAndroid Build Coastguard Worker 2180*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 2181*c0909341SAndroid Build Coastguard Worker ITX_16X8_LOAD_COEFS 3120 2182*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main 2183*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_16384)] 2184*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m2 2185*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 2186*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m3 2187*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 2188*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m4, m6 2189*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m6 2190*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m5, m7 2191*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 2192*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m8, m1, m4, m6 2193*c0909341SAndroid Build Coastguard Worker.pass1_end: 2194*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m0, m2, m9, m5 2195*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m8 2196*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m8 2197*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m1 2198*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 2199*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m9, m4 2200*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m4 2201*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 2202*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 2203*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 2204*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2205*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m8 2206*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m8 2207*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m4 2208*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m4 2209*c0909341SAndroid Build Coastguard Worker punpckldq m8, m9, m5 2210*c0909341SAndroid Build Coastguard Worker punpckhdq m9, m5 2211*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m6, 0x31 2212*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm6, 1 2213*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m7, 0x31 2214*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm7, 1 2215*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m2, m8, 0x31 2216*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm8, 1 2217*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m9, 0x31 2218*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm9, 1 2219*c0909341SAndroid Build Coastguard Worker jmp tx2q 2220*c0909341SAndroid Build Coastguard Worker.pass2: 2221*c0909341SAndroid Build Coastguard Worker call .main 2222*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2048)] 2223*c0909341SAndroid Build Coastguard Worker.end: 2224*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m2, m4, m6 2225*c0909341SAndroid Build Coastguard Worker.end2: 2226*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m1, m3, m5, m7 2227*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 2228*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 2229*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r3 2230*c0909341SAndroid Build Coastguard Worker.end3: 2231*c0909341SAndroid Build Coastguard Worker pxor m0, m0 2232*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 2233*c0909341SAndroid Build Coastguard Worker.end4: 2234*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2235*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 2236*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r3 2237*c0909341SAndroid Build Coastguard Worker RET 2238*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2239*c0909341SAndroid Build Coastguard Workercglobal_label .main 2240*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 2241*c0909341SAndroid Build Coastguard Worker.main2: 2242*c0909341SAndroid Build Coastguard Worker IDCT8_1D 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 2243*c0909341SAndroid Build Coastguard Worker ret 2244*c0909341SAndroid Build Coastguard Worker 2245*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct 2246*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst 2247*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst 2248*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity 2249*c0909341SAndroid Build Coastguard Worker 2250*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 2251*c0909341SAndroid Build Coastguard Worker ITX_16X8_LOAD_COEFS 1302 2252*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main2 2253*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main_pass1_end 2254*c0909341SAndroid Build Coastguard Worker psubw m11, m9, m10 2255*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m0, m2 2256*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 2257*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m3 2258*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 2259*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m4, m6 2260*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m6 2261*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5, m7 2262*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m7 2263*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m11}, m8, m1, m4, m6 2264*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_8bpc).pass1_end 2265*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2266*c0909341SAndroid Build Coastguard Worker.pass2: 2267*c0909341SAndroid Build Coastguard Worker call .main 2268*c0909341SAndroid Build Coastguard Worker call .main_pass2_end 2269*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2270*c0909341SAndroid Build Coastguard Worker psubw m8, m9 2271*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m0, m2, m4, m6 2272*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_8bpc).end2 2273*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2274*c0909341SAndroid Build Coastguard Workercglobal_label .main 2275*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 2276*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 7, 0, 8, 9, 10, 401, 4076 ; t1a, t0a 2277*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 4, 8, 9, 10, 3166, 2598 ; t5a, t4a 2278*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 6, 8, 9, 10, 3920, 1189 ; t7a, t6a 2279*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 2, 8, 9, 10, 1931, 3612 ; t3a, t2a 2280*c0909341SAndroid Build Coastguard Worker psubsw m8, m2, m6 ; t6 2281*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 ; t2 2282*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m4 ; t4 2283*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ; t0 2284*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m1 ; t7 2285*c0909341SAndroid Build Coastguard Worker paddsw m5, m1 ; t3 2286*c0909341SAndroid Build Coastguard Worker psubsw m1, m7, m3 ; t5 2287*c0909341SAndroid Build Coastguard Worker paddsw m7, m3 ; t1 2288*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 1, 3, 9, 10, 1567, 3784 ; t5a, t4a 2289*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 8, 3, 9, 10, 3784, 1567 ; t6a, t7a 2290*c0909341SAndroid Build Coastguard Worker psubsw m9, m6, m8 ; t7 2291*c0909341SAndroid Build Coastguard Worker paddsw m6, m8 ; out6 2292*c0909341SAndroid Build Coastguard Worker psubsw m3, m7, m5 ; t3 2293*c0909341SAndroid Build Coastguard Worker paddsw m7, m5 ; -out7 2294*c0909341SAndroid Build Coastguard Worker psubsw m5, m0, m2 ; t2 2295*c0909341SAndroid Build Coastguard Worker paddsw m0, m2 ; out0 2296*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m4 ; t6 2297*c0909341SAndroid Build Coastguard Worker paddsw m1, m4 ; -out1 2298*c0909341SAndroid Build Coastguard Worker ret 2299*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2300*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 2301*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_m2896_2896)] 2302*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_2896_2896)] 2303*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m5 2304*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5 2305*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m11, m4 2306*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m12 2307*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m11, m3 2308*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m12 2309*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10}, m5, m4, m8, m3 2310*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m5, m8, m4, m3 2311*c0909341SAndroid Build Coastguard Worker packssdw m3, m4 ; -out3 2312*c0909341SAndroid Build Coastguard Worker packssdw m4, m8, m5 ; out4 2313*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m9, m2 2314*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m2 2315*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m12, m5 2316*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m11 2317*c0909341SAndroid Build Coastguard Worker pmaddwd m12, m9 2318*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m9 2319*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10}, m2, m5, m12, m11 2320*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m2, m12, m5, m11 2321*c0909341SAndroid Build Coastguard Worker packssdw m2, m12 ; out2 2322*c0909341SAndroid Build Coastguard Worker packssdw m5, m11 ; -out5 2323*c0909341SAndroid Build Coastguard Worker ret 2324*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2325*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 2326*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2896x8)] 2327*c0909341SAndroid Build Coastguard Worker psubsw m4, m5, m3 2328*c0909341SAndroid Build Coastguard Worker paddsw m3, m5 2329*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m9 2330*c0909341SAndroid Build Coastguard Worker paddsw m2, m9 2331*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m8 ; out2 2332*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m8 ; -out3 2333*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m8 ; out4 2334*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m8 ; -out5 2335*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2048)] 2336*c0909341SAndroid Build Coastguard Worker ret 2337*c0909341SAndroid Build Coastguard Worker 2338*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct 2339*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst 2340*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst 2341*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity 2342*c0909341SAndroid Build Coastguard Worker 2343*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 2344*c0909341SAndroid Build Coastguard Worker ITX_16X8_LOAD_COEFS 1302 2345*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main2 2346*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main_pass1_end 2347*c0909341SAndroid Build Coastguard Worker psubw m9, m10 2348*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m6, m4 2349*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m4 2350*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m7, m5 2351*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m5 2352*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m3, m1 2353*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 2354*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m0 2355*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m0 2356*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m8, m4, m5, m1 2357*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9 }, m6, m7, m3, m2 2358*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7, m4 2359*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m4 2360*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m6, m8 2361*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m8 2362*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m3, m5 2363*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5 2364*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2, m1 2365*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1 2366*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m6 2367*c0909341SAndroid Build Coastguard Worker punpckldq m0, m6 2368*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m4 2369*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m4 2370*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m3, m5 2371*c0909341SAndroid Build Coastguard Worker punpckldq m3, m5 2372*c0909341SAndroid Build Coastguard Worker punpckldq m5, m8, m2 2373*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m2 2374*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m6, xm5, 1 2375*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m5, 0x31 2376*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m4, 0x31 2377*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm4, 1 2378*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m3, 0x31 2379*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm3, 1 2380*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m7, xm8, 1 2381*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m8, 0x31 2382*c0909341SAndroid Build Coastguard Worker jmp tx2q 2383*c0909341SAndroid Build Coastguard Worker.pass2: 2384*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 2385*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2_end 2386*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2387*c0909341SAndroid Build Coastguard Worker psubw m8, m9 2388*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m7, m8 2389*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m0, m9 2390*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6, m9 2391*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m1, m8 2392*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5, m8 2393*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m2, m9 2394*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4, m9 2395*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m3, m8 2396*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 2397*c0909341SAndroid Build Coastguard Worker WRITE_16X2 10, 0, 8, 9, strideq*0, strideq*1 2398*c0909341SAndroid Build Coastguard Worker WRITE_16X2 1, 2, 0, 1, strideq*2, r3 2399*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_8bpc).end3 2400*c0909341SAndroid Build Coastguard Worker 2401*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct 2402*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst 2403*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst 2404*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity 2405*c0909341SAndroid Build Coastguard Worker 2406*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_8bpc, 0, 5, 13, dst, stride, c, eob, tx2 2407*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+16*0] 2408*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*1] 2409*c0909341SAndroid Build Coastguard Worker add cq, 16*8 2410*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_2896x8)] 2411*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+16*0], 1 2412*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*1], 1 2413*c0909341SAndroid Build Coastguard Worker mova xm6, [cq-16*6] 2414*c0909341SAndroid Build Coastguard Worker mova xm4, [cq-16*5] 2415*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+16*2], 1 2416*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16*3], 1 2417*c0909341SAndroid Build Coastguard Worker mova xm8, [cq-16*4] 2418*c0909341SAndroid Build Coastguard Worker mova xm5, [cq-16*3] 2419*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [cq+16*4], 1 2420*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+16*5], 1 2421*c0909341SAndroid Build Coastguard Worker mova xm0, [cq-16*2] 2422*c0909341SAndroid Build Coastguard Worker mova xm1, [cq-16*1] 2423*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*6], 1 2424*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16*7], 1 2425*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_1697x16)] 2426*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_16384)] 2427*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m3}, m7, m2, m6, m4, m8, m5, m0, m1 2428*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m7, m2 2429*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m2 2430*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m6, m4 2431*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m4 2432*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m8, m5 2433*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m5 2434*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m1 2435*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 2436*c0909341SAndroid Build Coastguard Worker punpckldq m1, m3, m2 2437*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m2 2438*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m5 2439*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m5 2440*c0909341SAndroid Build Coastguard Worker punpckldq m5, m7, m6 2441*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m6 2442*c0909341SAndroid Build Coastguard Worker punpckldq m6, m8, m0 2443*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m0 2444*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 0, 10, 11}, 1, 3, 2, 4, 5, 7, 6, 8 2445*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1, m2 2446*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m2 2447*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 2448*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 2449*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5, m6 2450*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m6 2451*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m7, m8 2452*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m8 2453*c0909341SAndroid Build Coastguard Worker jmp tx2q 2454*c0909341SAndroid Build Coastguard Worker.pass2: 2455*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_4096)] 2456*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_8bpc).end 2457*c0909341SAndroid Build Coastguard Worker 2458*c0909341SAndroid Build Coastguard Worker%define o_base pw_5 + 128 2459*c0909341SAndroid Build Coastguard Worker 2460*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2 ; type1, type2 2461*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 16x16 2462*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2463*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 2464*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 2465*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 2466*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 2467*c0909341SAndroid Build Coastguard Worker or r3d, 16 2468*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 2469*c0909341SAndroid Build Coastguard Worker%endif 2470*c0909341SAndroid Build Coastguard Worker%endmacro 2471*c0909341SAndroid Build Coastguard Worker 2472*c0909341SAndroid Build Coastguard Worker%macro ITX_16X16_LOAD_COEFS 0 2473*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 2474*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 2475*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 2476*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 2477*c0909341SAndroid Build Coastguard Worker add cq, 32*8 2478*c0909341SAndroid Build Coastguard Worker mova m4, [cq-32*4] 2479*c0909341SAndroid Build Coastguard Worker mova m5, [cq-32*3] 2480*c0909341SAndroid Build Coastguard Worker mova m6, [cq-32*2] 2481*c0909341SAndroid Build Coastguard Worker mova m7, [cq-32*1] 2482*c0909341SAndroid Build Coastguard Worker mova m8, [cq+32*0] 2483*c0909341SAndroid Build Coastguard Worker mova m9, [cq+32*1] 2484*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32*2] 2485*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32*3] 2486*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32*4] 2487*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*5] 2488*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*6] 2489*c0909341SAndroid Build Coastguard Worker mova m15, [cq+32*7] 2490*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 2491*c0909341SAndroid Build Coastguard Worker%endmacro 2492*c0909341SAndroid Build Coastguard Worker 2493*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct 2494*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst 2495*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst 2496*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity 2497*c0909341SAndroid Build Coastguard Worker 2498*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_8bpc, 0, 5, 16, 32*3, dst, stride, c, eob, tx2 2499*c0909341SAndroid Build Coastguard Worker ITX_16X16_LOAD_COEFS 2500*c0909341SAndroid Build Coastguard Worker call .main 2501*c0909341SAndroid Build Coastguard Worker.pass1_end: 2502*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_8192)] 2503*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m0, m2, m4, m6, m8, m10, m12, m14 2504*c0909341SAndroid Build Coastguard Worker vextracti128 [rsp+16*5], m8, 1 2505*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], xm8 2506*c0909341SAndroid Build Coastguard Worker.pass1_end2: 2507*c0909341SAndroid Build Coastguard Worker vextracti128 [rsp+16*4], m0, 1 2508*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], xm0 2509*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m3, m5, m7, m9, m11, m13, m15 2510*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [rsp+32*1] 2511*c0909341SAndroid Build Coastguard Worker vperm2i128 m8, m1, m9, 0x31 2512*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm9, 1 2513*c0909341SAndroid Build Coastguard Worker vperm2i128 m9, m2, m10, 0x31 2514*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm10, 1 2515*c0909341SAndroid Build Coastguard Worker vperm2i128 m10, m3, m11, 0x31 2516*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm11, 1 2517*c0909341SAndroid Build Coastguard Worker vperm2i128 m11, m4, m12, 0x31 2518*c0909341SAndroid Build Coastguard Worker vinserti128 m4, xm12, 1 2519*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m5, m13, 0x31 2520*c0909341SAndroid Build Coastguard Worker vinserti128 m5, xm13, 1 2521*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m6, m14, 0x31 2522*c0909341SAndroid Build Coastguard Worker vinserti128 m6, xm14, 1 2523*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m7, m15, 0x31 2524*c0909341SAndroid Build Coastguard Worker vinserti128 m7, xm15, 1 2525*c0909341SAndroid Build Coastguard Worker mova m15, [rsp+32*2] 2526*c0909341SAndroid Build Coastguard Worker.pass1_end3: 2527*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m9, m10 2528*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m10 2529*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m15, m8 2530*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m8 2531*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m11, m12 2532*c0909341SAndroid Build Coastguard Worker punpcklwd m11, m12 2533*c0909341SAndroid Build Coastguard Worker punpckhwd m12, m13, m14 2534*c0909341SAndroid Build Coastguard Worker punpcklwd m13, m14 2535*c0909341SAndroid Build Coastguard Worker punpckhdq m14, m11, m13 2536*c0909341SAndroid Build Coastguard Worker punpckldq m11, m13 2537*c0909341SAndroid Build Coastguard Worker punpckldq m13, m15, m9 2538*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m9 2539*c0909341SAndroid Build Coastguard Worker punpckldq m9, m10, m0 2540*c0909341SAndroid Build Coastguard Worker punpckhdq m10, m0 2541*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m8, m12 2542*c0909341SAndroid Build Coastguard Worker punpckldq m8, m12 2543*c0909341SAndroid Build Coastguard Worker punpcklqdq m12, m13, m8 2544*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m8 2545*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m9, m11 2546*c0909341SAndroid Build Coastguard Worker punpckhqdq m9, m11 2547*c0909341SAndroid Build Coastguard Worker punpckhqdq m11, m10, m14 2548*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m14 2549*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m15, m0 2550*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m0 2551*c0909341SAndroid Build Coastguard Worker mova m0, [rsp] 2552*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 2553*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4, m5 2554*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 2555*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m1 2556*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2557*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6, m7 2558*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 2559*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m2, m3 2560*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2561*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 2562*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2563*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 2564*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 2565*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m5, m7 2566*c0909341SAndroid Build Coastguard Worker punpckldq m5, m7 2567*c0909341SAndroid Build Coastguard Worker punpckldq m7, m15, m1 2568*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m1 2569*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 2570*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 2571*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 2572*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 2573*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5, m7 2574*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7 2575*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m15 2576*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m15 2577*c0909341SAndroid Build Coastguard Worker jmp tx2q 2578*c0909341SAndroid Build Coastguard Worker.pass2: 2579*c0909341SAndroid Build Coastguard Worker call .main 2580*c0909341SAndroid Build Coastguard Worker.end: 2581*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_2048)] 2582*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m0, m2, m4, m6, m8, m10, m12, m14 2583*c0909341SAndroid Build Coastguard Worker mova [rsp], m6 2584*c0909341SAndroid Build Coastguard Worker.end2: 2585*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m3, m5, m7, m9, m11, m13, m15 2586*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, [rsp+32*1] 2587*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 2588*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 6, 0, strideq*0, strideq*1 2589*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r3 2590*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2591*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 2592*c0909341SAndroid Build Coastguard Worker WRITE_16X2 [rsp], 7, 0, 1, strideq*2, r3 2593*c0909341SAndroid Build Coastguard Worker.end3: 2594*c0909341SAndroid Build Coastguard Worker pxor m2, m2 2595*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m2}, -8, -7, -6, -5, -4, -3, -2, -1 2596*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2597*c0909341SAndroid Build Coastguard Worker WRITE_16X2 8, 9, 0, 1, strideq*0, strideq*1 2598*c0909341SAndroid Build Coastguard Worker WRITE_16X2 10, 11, 0, 1, strideq*2, r3 2599*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m2}, 0, 1, 2, 3, 4, 5, 6, 7 2600*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2601*c0909341SAndroid Build Coastguard Worker WRITE_16X2 12, 13, 0, 1, strideq*0, strideq*1 2602*c0909341SAndroid Build Coastguard Worker WRITE_16X2 14, 15, 0, 1, strideq*2, r3 2603*c0909341SAndroid Build Coastguard Worker RET 2604*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2605*c0909341SAndroid Build Coastguard Workercglobal_label .main 2606*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 2607*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m1 2608*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m9 2609*c0909341SAndroid Build Coastguard Worker IDCT8_1D 0, 2, 4, 6, 8, 10, 12, 14, 1, 9, 15 2610*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*2] ; in9 2611*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m14 ; tmp7 2612*c0909341SAndroid Build Coastguard Worker mova m9, [rsp+gprsize+32*1] ; in1 2613*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m10 ; tmp5 2614*c0909341SAndroid Build Coastguard Worker mova m14, [rsp+gprsize+32*0] ; in15 2615*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m6 ; tmp3 2616*c0909341SAndroid Build Coastguard Worker IDCT16_1D_ODDHALF 9, 3, 5, 7, 1, 11, 13, 14, 6, 10, 15 2617*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+32*1] ; tmp5 2618*c0909341SAndroid Build Coastguard Worker psubsw m15, m0, m14 ; out15 2619*c0909341SAndroid Build Coastguard Worker paddsw m0, m14 ; out0 2620*c0909341SAndroid Build Coastguard Worker psubsw m14, m2, m13 ; out14 2621*c0909341SAndroid Build Coastguard Worker paddsw m2, m13 ; out1 2622*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m2 2623*c0909341SAndroid Build Coastguard Worker psubsw m13, m4, m11 ; out13 2624*c0909341SAndroid Build Coastguard Worker paddsw m2, m4, m11 ; out2 2625*c0909341SAndroid Build Coastguard Worker psubsw m11, m8, m7 ; out11 2626*c0909341SAndroid Build Coastguard Worker paddsw m4, m8, m7 ; out4 2627*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+32*2] ; tmp7 2628*c0909341SAndroid Build Coastguard Worker psubsw m10, m6, m5 ; out10 2629*c0909341SAndroid Build Coastguard Worker paddsw m5, m6 ; out5 2630*c0909341SAndroid Build Coastguard Worker psubsw m8, m7, m9 ; out8 2631*c0909341SAndroid Build Coastguard Worker paddsw m7, m9 ; out7 2632*c0909341SAndroid Build Coastguard Worker psubsw m9, m12, m3 ; out9 2633*c0909341SAndroid Build Coastguard Worker paddsw m6, m12, m3 ; out6 2634*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+32*0] ; tmp3 2635*c0909341SAndroid Build Coastguard Worker psubsw m12, m3, m1 ; out12 2636*c0909341SAndroid Build Coastguard Worker paddsw m3, m1 ; out3 2637*c0909341SAndroid Build Coastguard Worker ret 2638*c0909341SAndroid Build Coastguard Worker 2639*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct 2640*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst 2641*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst 2642*c0909341SAndroid Build Coastguard Worker 2643*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_8bpc, 0, 5, 16, 32*3, dst, stride, c, eob, tx2 2644*c0909341SAndroid Build Coastguard Worker ITX_16X16_LOAD_COEFS 2645*c0909341SAndroid Build Coastguard Worker call .main 2646*c0909341SAndroid Build Coastguard Worker call .main_pass1_end 2647*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, [cq+32*0] 2648*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, [cq+32*1] 2649*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m4, m6, m8, m10 2650*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m1, [cq+32*2] 2651*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m1, [cq+32*3] 2652*c0909341SAndroid Build Coastguard Worker vextracti128 [rsp+16*5], m8, 1 2653*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], xm8 2654*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2655*c0909341SAndroid Build Coastguard Worker psubw m1, m8, m1 2656*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).pass1_end2 2657*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2658*c0909341SAndroid Build Coastguard Worker.pass2: 2659*c0909341SAndroid Build Coastguard Worker call .main 2660*c0909341SAndroid Build Coastguard Worker call .main_pass2_end 2661*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m0, m2, m4, m6, m8, m10, m12, m14 2662*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 2663*c0909341SAndroid Build Coastguard Worker pxor m6, m6 2664*c0909341SAndroid Build Coastguard Worker psubw m1, m6, m1 2665*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).end2 2666*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2667*c0909341SAndroid Build Coastguard Workercglobal_label .main 2668*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 2669*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m0 2670*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m4 2671*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 13, 2, 0, 4, 15, 995, 3973 ; t3, t2 2672*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 9, 6, 0, 4, 15, 2440, 3290 ; t7, t6 2673*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 10, 0, 4, 15, 3513, 2106 ; t11, t10 2674*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 14, 0, 4, 15, 4052, 601 ; t15, t14 2675*c0909341SAndroid Build Coastguard Worker psubsw m0, m2, m10 ; t10a 2676*c0909341SAndroid Build Coastguard Worker paddsw m2, m10 ; t2a 2677*c0909341SAndroid Build Coastguard Worker psubsw m10, m13, m5 ; t11a 2678*c0909341SAndroid Build Coastguard Worker paddsw m13, m5 ; t3a 2679*c0909341SAndroid Build Coastguard Worker psubsw m5, m6, m14 ; t14a 2680*c0909341SAndroid Build Coastguard Worker paddsw m6, m14 ; t6a 2681*c0909341SAndroid Build Coastguard Worker psubsw m14, m9, m1 ; t15a 2682*c0909341SAndroid Build Coastguard Worker paddsw m9, m1 ; t7a 2683*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 10, 1, 4, 15, 3406, 2276 ; t11, t10 2684*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 14, 5, 1, 4, 15, 2276, 3406 ; t14, t15 2685*c0909341SAndroid Build Coastguard Worker psubsw m1, m10, m14 ; t14a 2686*c0909341SAndroid Build Coastguard Worker paddsw m10, m14 ; t10a 2687*c0909341SAndroid Build Coastguard Worker psubsw m14, m0, m5 ; t15a 2688*c0909341SAndroid Build Coastguard Worker paddsw m0, m5 ; t11a 2689*c0909341SAndroid Build Coastguard Worker psubsw m5, m2, m6 ; t6 2690*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 ; t2 2691*c0909341SAndroid Build Coastguard Worker psubsw m6, m13, m9 ; t7 2692*c0909341SAndroid Build Coastguard Worker paddsw m13, m9 ; t3 2693*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 5, 4, 9, 15, 3784, 1567 ; t6a, t7a 2694*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 14, 1, 4, 9, 15, 3784, 1567 ; t14, t15 2695*c0909341SAndroid Build Coastguard Worker mova m9, [rsp+gprsize+32*0] ; in15 2696*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m10 ; t10a 2697*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize+32*1] ; in0 2698*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m6 ; t6a 2699*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+32*2] ; in4 2700*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m2 ; t2 2701*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 9, 4, 2, 10, 15, 201, 4091 ; t1, t0 2702*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 11, 6, 2, 10, 15, 1751, 3703 ; t5, t4 2703*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 7, 8, 2, 10, 15, 3035, 2751 ; t9, t8 2704*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 12, 2, 10, 15, 3857, 1380 ; t13, t12 2705*c0909341SAndroid Build Coastguard Worker psubsw m10, m4, m8 ; t8a 2706*c0909341SAndroid Build Coastguard Worker paddsw m8, m4 ; t0a 2707*c0909341SAndroid Build Coastguard Worker psubsw m4, m9, m7 ; t9a 2708*c0909341SAndroid Build Coastguard Worker paddsw m9, m7 ; t1a 2709*c0909341SAndroid Build Coastguard Worker psubsw m7, m6, m12 ; t12a 2710*c0909341SAndroid Build Coastguard Worker paddsw m6, m12 ; t4a 2711*c0909341SAndroid Build Coastguard Worker psubsw m12, m11, m3 ; t13a 2712*c0909341SAndroid Build Coastguard Worker paddsw m11, m3 ; t5a 2713*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 10, 4, 2, 3, 15, 799, 4017 ; t9, t8 2714*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 12, 7, 2, 3, 15, 4017, 799 ; t12, t13 2715*c0909341SAndroid Build Coastguard Worker psubsw m3, m9, m11 ; t5 2716*c0909341SAndroid Build Coastguard Worker paddsw m9, m11 ; t1 2717*c0909341SAndroid Build Coastguard Worker psubsw m11, m4, m12 ; t12a 2718*c0909341SAndroid Build Coastguard Worker paddsw m4, m12 ; t8a 2719*c0909341SAndroid Build Coastguard Worker paddsw m12, m8, m6 ; t0 2720*c0909341SAndroid Build Coastguard Worker psubsw m8, m6 ; t4 2721*c0909341SAndroid Build Coastguard Worker paddsw m6, m10, m7 ; t9a 2722*c0909341SAndroid Build Coastguard Worker psubsw m10, m7 ; t13a 2723*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 8, 3, 2, 7, 15, 1567, 3784 ; t5a, t4a 2724*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 11, 10, 2, 7, 15, 1567, 3784 ; t13, t12 2725*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+32*0] ; t10a 2726*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+32*1] ; t6a 2727*c0909341SAndroid Build Coastguard Worker paddsw m15, m9, m13 ; -out15 2728*c0909341SAndroid Build Coastguard Worker psubsw m9, m13 ; t3a 2729*c0909341SAndroid Build Coastguard Worker paddsw m13, m11, m1 ; -out13 2730*c0909341SAndroid Build Coastguard Worker psubsw m11, m1 ; t15a 2731*c0909341SAndroid Build Coastguard Worker psubsw m1, m4, m7 ; t10 2732*c0909341SAndroid Build Coastguard Worker paddsw m7, m4 ; -out1 2733*c0909341SAndroid Build Coastguard Worker psubsw m4, m3, m2 ; t6 2734*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ; -out3 2735*c0909341SAndroid Build Coastguard Worker paddsw m2, m10, m14 ; out2 2736*c0909341SAndroid Build Coastguard Worker psubsw m10, m14 ; t14a 2737*c0909341SAndroid Build Coastguard Worker paddsw m14, m6, m0 ; out14 2738*c0909341SAndroid Build Coastguard Worker psubsw m6, m0 ; t11 2739*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+32*2] ; t2 2740*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m7 2741*c0909341SAndroid Build Coastguard Worker psubsw m7, m12, m0 ; t2a 2742*c0909341SAndroid Build Coastguard Worker paddsw m0, m12 ; out0 2743*c0909341SAndroid Build Coastguard Worker paddsw m12, m8, m5 ; out12 2744*c0909341SAndroid Build Coastguard Worker psubsw m8, m5 ; t7 2745*c0909341SAndroid Build Coastguard Worker ret 2746*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2747*c0909341SAndroid Build Coastguard Worker.main_pass1_end: 2748*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m0 2749*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m2 2750*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m12 2751*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m14 2752*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pw_m2896_2896] 2753*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2896_2896] 2754*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [pd_2048] 2755*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m11, m10 2756*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m10 2757*c0909341SAndroid Build Coastguard Worker pmaddwd m10, m14, m5 2758*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m14, m11 2759*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m12 2760*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m12 2761*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m2}, m10, m0, m5, m11 2762*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m10, m0, m5, m11 2763*c0909341SAndroid Build Coastguard Worker packssdw m10, m0 ; out10 2764*c0909341SAndroid Build Coastguard Worker packssdw m5, m11 ; -out5 2765*c0909341SAndroid Build Coastguard Worker punpcklwd m11, m8, m4 2766*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m4 2767*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m12, m11 2768*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m12, m8 2769*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m14 2770*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m14 2771*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m2}, m4, m0, m11, m8 2772*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m0, m11, m8 2773*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; out4 2774*c0909341SAndroid Build Coastguard Worker packssdw m11, m8 ; -out11 2775*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9, m7 2776*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m7 2777*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m12, m8 2778*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m12, m9 2779*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m14 2780*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m14 2781*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m2}, m7, m0, m8, m9 2782*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m7, m0, m8, m9 2783*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; -out7 2784*c0909341SAndroid Build Coastguard Worker packssdw m8, m9 ; out8 2785*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m6, m1 2786*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m1 2787*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m14, m0 2788*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m14, m6 2789*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m12 2790*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m12 2791*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m2}, m1, m9, m0, m6 2792*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m1, m9, m0, m6 2793*c0909341SAndroid Build Coastguard Worker packssdw m9, m1 ; -out7 2794*c0909341SAndroid Build Coastguard Worker packssdw m6, m0 ; out8 2795*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_8192)] 2796*c0909341SAndroid Build Coastguard Worker ret 2797*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2798*c0909341SAndroid Build Coastguard Workercglobal_label .main_pass2_end 2799*c0909341SAndroid Build Coastguard Worker ; In pass 2 we're going to clip to pixels afterwards anyway, so clipping to 2800*c0909341SAndroid Build Coastguard Worker ; 16-bit here will produce the same result as using 32-bit intermediates. 2801*c0909341SAndroid Build Coastguard Worker paddsw m5, m10, m11 ; -out5 2802*c0909341SAndroid Build Coastguard Worker psubsw m10, m11 ; out10 2803*c0909341SAndroid Build Coastguard Worker psubsw m11, m4, m8 ; -out11 2804*c0909341SAndroid Build Coastguard Worker paddsw m4, m8 ; out4 2805*c0909341SAndroid Build Coastguard Worker psubsw m8, m7, m9 ; out8 2806*c0909341SAndroid Build Coastguard Worker paddsw m7, m9 ; -out7 2807*c0909341SAndroid Build Coastguard Worker psubsw m9, m1, m6 ; -out9 2808*c0909341SAndroid Build Coastguard Worker paddsw m6, m1 ; out6 2809*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_2896x8)] 2810*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m1}, m4, m5, m6, m7, m8, m9, m10, m11 2811*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(pw_2048)] 2812*c0909341SAndroid Build Coastguard Worker ret 2813*c0909341SAndroid Build Coastguard Worker 2814*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct 2815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst 2816*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst 2817*c0909341SAndroid Build Coastguard Worker 2818*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_8bpc, 0, 5, 16, 32*3, dst, stride, c, eob, tx2 2819*c0909341SAndroid Build Coastguard Worker ITX_16X16_LOAD_COEFS 2820*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main 2821*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass1_end 2822*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m1 2823*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, m8 2824*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m6 2825*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m1, m4 2826*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m1, m10 2827*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m1, [cq+32*3] 2828*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m1, [cq+32*2] 2829*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m1, [cq+32*1] 2830*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m1, [cq+32*0] 2831*c0909341SAndroid Build Coastguard Worker pxor m0, m0 2832*c0909341SAndroid Build Coastguard Worker psubw m0, m1 2833*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m0}, m3, m5, m7, m11, m15 2834*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m0, m9 2835*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m0, m13 2836*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, [rsp+32*1] 2837*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], xm15 2838*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], xm7 2839*c0909341SAndroid Build Coastguard Worker vperm2i128 m15, m15, m7, 0x31 2840*c0909341SAndroid Build Coastguard Worker vinserti128 m7, m2, xm14, 1 2841*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m2, m14, 0x31 2842*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m9, xm5, 1 2843*c0909341SAndroid Build Coastguard Worker vperm2i128 m9, m9, m5, 0x31 2844*c0909341SAndroid Build Coastguard Worker vinserti128 m5, m4, xm12, 1 2845*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m4, m12, 0x31 2846*c0909341SAndroid Build Coastguard Worker vinserti128 m4, m11, xm3, 1 2847*c0909341SAndroid Build Coastguard Worker vperm2i128 m11, m11, m3, 0x31 2848*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m10, xm6, 1 2849*c0909341SAndroid Build Coastguard Worker vperm2i128 m10, m10, m6, 0x31 2850*c0909341SAndroid Build Coastguard Worker vinserti128 m6, m1, xm0, 1 2851*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m1, m0, 0x31 2852*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m8, [rsp+32*2], 1 2853*c0909341SAndroid Build Coastguard Worker vperm2i128 m8, m8, [rsp+32*2], 0x31 2854*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).pass1_end3 2855*c0909341SAndroid Build Coastguard Worker.pass2: 2856*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main 2857*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass2_end 2858*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2859*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m1 2860*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m0 2861*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m8 2862*c0909341SAndroid Build Coastguard Worker pxor m0, m0 2863*c0909341SAndroid Build Coastguard Worker psubw m0, m1 2864*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m0, m7 2865*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m0, m9 2866*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m1, m6 2867*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m1, m10 2868*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m0, m5 2869*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m0, m11 2870*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m1, m4 2871*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m1, m12 2872*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m0, m3 2873*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, m13 2874*c0909341SAndroid Build Coastguard Worker pmulhrsw m13, m1, m2 2875*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m14 2876*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m0, [rsp+32*1] 2877*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m15 2878*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 2879*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 2, 0, strideq*0, strideq*1 2880*c0909341SAndroid Build Coastguard Worker mova m15, [rsp+32*0] 2881*c0909341SAndroid Build Coastguard Worker WRITE_16X2 3, 4, 0, 1, strideq*2, r3 2882*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2883*c0909341SAndroid Build Coastguard Worker WRITE_16X2 5, 6, 0, 1, strideq*0, strideq*1 2884*c0909341SAndroid Build Coastguard Worker WRITE_16X2 7, [rsp+32*2], 0, 1, strideq*2, r3 2885*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).end3 2886*c0909341SAndroid Build Coastguard Worker 2887*c0909341SAndroid Build Coastguard Worker%macro IDTX16B 3 ; src/dst, tmp, pw_1697x16 2888*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3, m%1 2889*c0909341SAndroid Build Coastguard Worker psraw m%2, 1 2890*c0909341SAndroid Build Coastguard Worker pavgw m%1, m%2 ; signs are guaranteed to be equal 2891*c0909341SAndroid Build Coastguard Worker%endmacro 2892*c0909341SAndroid Build Coastguard Worker 2893*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct 2894*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity 2895*c0909341SAndroid Build Coastguard Worker 2896*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_8bpc, 0, 5, 16, 32*3, dst, stride, c, eob, tx2 2897*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_1697x16)] 2898*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16* 0] 2899*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*16], 1 2900*c0909341SAndroid Build Coastguard Worker mova xm15, [cq+16* 1] 2901*c0909341SAndroid Build Coastguard Worker vinserti128 m15, [cq+16*17], 1 2902*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16* 2] 2903*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16*18], 1 2904*c0909341SAndroid Build Coastguard Worker mova xm8, [cq+16* 3] 2905*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [cq+16*19], 1 2906*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16* 4] 2907*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*20], 1 2908*c0909341SAndroid Build Coastguard Worker mova xm9, [cq+16* 5] 2909*c0909341SAndroid Build Coastguard Worker vinserti128 m9, [cq+16*21], 1 2910*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+16* 6] 2911*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+16*22], 1 2912*c0909341SAndroid Build Coastguard Worker mova xm10, [cq+16* 7] 2913*c0909341SAndroid Build Coastguard Worker add cq, 16*16 2914*c0909341SAndroid Build Coastguard Worker vinserti128 m10, [cq+16* 7], 1 2915*c0909341SAndroid Build Coastguard Worker mova xm4, [cq-16* 8] 2916*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16* 8], 1 2917*c0909341SAndroid Build Coastguard Worker mova xm11, [cq-16* 7] 2918*c0909341SAndroid Build Coastguard Worker vinserti128 m11, [cq+16* 9], 1 2919*c0909341SAndroid Build Coastguard Worker mova xm5, [cq-16* 6] 2920*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+16*10], 1 2921*c0909341SAndroid Build Coastguard Worker mova xm12, [cq-16* 5] 2922*c0909341SAndroid Build Coastguard Worker vinserti128 m12, [cq+16*11], 1 2923*c0909341SAndroid Build Coastguard Worker mova xm13, [cq-16* 3] 2924*c0909341SAndroid Build Coastguard Worker vinserti128 m13, [cq+16*13], 1 2925*c0909341SAndroid Build Coastguard Worker mova xm14, [cq-16* 1] 2926*c0909341SAndroid Build Coastguard Worker vinserti128 m14, [cq+16*15], 1 2927*c0909341SAndroid Build Coastguard Worker REPX {IDTX16B x, 6, 7}, 0, 15, 1, 8, 2, 9, 3, \ 2928*c0909341SAndroid Build Coastguard Worker 10, 4, 11, 5, 12, 13, 14 2929*c0909341SAndroid Build Coastguard Worker mova xm6, [cq-16* 4] 2930*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+16*12], 1 2931*c0909341SAndroid Build Coastguard Worker mova [rsp], m0 2932*c0909341SAndroid Build Coastguard Worker IDTX16B 6, 0, 7 2933*c0909341SAndroid Build Coastguard Worker mova xm0, [cq-16* 2] 2934*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*14], 1 2935*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m0 2936*c0909341SAndroid Build Coastguard Worker psraw m7, 1 2937*c0909341SAndroid Build Coastguard Worker pavgw m7, m0 2938*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).pass1_end3 2939*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2940*c0909341SAndroid Build Coastguard Worker.pass2: 2941*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_1697x16)] 2942*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m0 2943*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 0, 15}, 1, 2, 3, 4, 5, 6, 7, \ 2944*c0909341SAndroid Build Coastguard Worker 8, 9, 10, 11, 12, 13, 14 2945*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32*1] 2946*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m1 2947*c0909341SAndroid Build Coastguard Worker IDTX16 0, 1, 15 2948*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*0] 2949*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, m1 2950*c0909341SAndroid Build Coastguard Worker paddsw m1, m1 2951*c0909341SAndroid Build Coastguard Worker paddsw m15, m1 2952*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).end 2953*c0909341SAndroid Build Coastguard Worker 2954*c0909341SAndroid Build Coastguard Worker%define o_base deint_shuf + 128 2955*c0909341SAndroid Build Coastguard Worker 2956*c0909341SAndroid Build Coastguard Worker%macro LOAD_8ROWS 2-3 0 ; src, stride, is_rect2 2957*c0909341SAndroid Build Coastguard Worker%if %3 2958*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2896x8)] 2959*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m15, [%1+%2*0] 2960*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, [%1+%2*1] 2961*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m15, [%1+%2*2] 2962*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m15, [%1+%2*3] 2963*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m15, [%1+%2*4] 2964*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m15, [%1+%2*5] 2965*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m15, [%1+%2*6] 2966*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m15, [%1+%2*7] 2967*c0909341SAndroid Build Coastguard Worker%else 2968*c0909341SAndroid Build Coastguard Worker mova m0, [%1+%2*0] 2969*c0909341SAndroid Build Coastguard Worker mova m1, [%1+%2*1] 2970*c0909341SAndroid Build Coastguard Worker mova m2, [%1+%2*2] 2971*c0909341SAndroid Build Coastguard Worker mova m3, [%1+%2*3] 2972*c0909341SAndroid Build Coastguard Worker mova m4, [%1+%2*4] 2973*c0909341SAndroid Build Coastguard Worker mova m5, [%1+%2*5] 2974*c0909341SAndroid Build Coastguard Worker mova m6, [%1+%2*6] 2975*c0909341SAndroid Build Coastguard Worker mova m7, [%1+%2*7] 2976*c0909341SAndroid Build Coastguard Worker%endif 2977*c0909341SAndroid Build Coastguard Worker%endmacro 2978*c0909341SAndroid Build Coastguard Worker 2979*c0909341SAndroid Build Coastguard Worker%macro LOAD_8ROWS_H 2-3 0 ; src, stride, is_rect2 2980*c0909341SAndroid Build Coastguard Worker%if %3 2981*c0909341SAndroid Build Coastguard Worker%if %3 == 1 2982*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2896x8)] 2983*c0909341SAndroid Build Coastguard Worker%endif 2984*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m15, [%1+%2*0] 2985*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m15, [%1+%2*1] 2986*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m15, [%1+%2*2] 2987*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m15, [%1+%2*3] 2988*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m15, [%1+%2*4] 2989*c0909341SAndroid Build Coastguard Worker pmulhrsw m13, m15, [%1+%2*5] 2990*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m15, [%1+%2*6] 2991*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, [%1+%2*7] 2992*c0909341SAndroid Build Coastguard Worker%else 2993*c0909341SAndroid Build Coastguard Worker mova m8, [%1+%2*0] 2994*c0909341SAndroid Build Coastguard Worker mova m9, [%1+%2*1] 2995*c0909341SAndroid Build Coastguard Worker mova m10, [%1+%2*2] 2996*c0909341SAndroid Build Coastguard Worker mova m11, [%1+%2*3] 2997*c0909341SAndroid Build Coastguard Worker mova m12, [%1+%2*4] 2998*c0909341SAndroid Build Coastguard Worker mova m13, [%1+%2*5] 2999*c0909341SAndroid Build Coastguard Worker mova m14, [%1+%2*6] 3000*c0909341SAndroid Build Coastguard Worker mova m15, [%1+%2*7] 3001*c0909341SAndroid Build Coastguard Worker%endif 3002*c0909341SAndroid Build Coastguard Worker%endmacro 3003*c0909341SAndroid Build Coastguard Worker 3004*c0909341SAndroid Build Coastguard Worker%macro ITX_UNPACK_MULHRSW 7 ; dst1, dst2/src, tmp, coef[1-4] 3005*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [r5-pw_201_4091x8+pw_%4_%5x8] 3006*c0909341SAndroid Build Coastguard Worker punpcklwd m%1, m%2, m%2 3007*c0909341SAndroid Build Coastguard Worker pmulhrsw m%1, m%3 3008*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [r5-pw_201_4091x8+pw_%6_%7x8] 3009*c0909341SAndroid Build Coastguard Worker punpckhwd m%2, m%2 3010*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3 3011*c0909341SAndroid Build Coastguard Worker%endmacro 3012*c0909341SAndroid Build Coastguard Worker 3013*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_8bpc, 4, 4, 0, dst, stride, c, eob 3014*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 3015*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3016*c0909341SAndroid Build Coastguard Worker jz .dconly 3017*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 4, 16, 32*3, dst, stride, c, eob 3018*c0909341SAndroid Build Coastguard Worker %undef cmp 3019*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3020*c0909341SAndroid Build Coastguard Worker jle .fast 3021*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+32*1, 32*2 3022*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3023*c0909341SAndroid Build Coastguard Worker vperm2i128 m11, m0, m4, 0x31 3024*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 3025*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m1, m5, 0x31 3026*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm5, 1 3027*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m2, m6, 0x31 3028*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm6, 1 3029*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m3, m7, 0x31 3030*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm7, 1 3031*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3032*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m7}, 1, 3, 5, 7, 9, 11, 13, 15 3033*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m1 3034*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 3035*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 3036*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 3037*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m11, m4 3038*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m4 3039*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5, m6 3040*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 3041*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m0, m2 3042*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3043*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m5 3044*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m5 3045*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m11, m4 3046*c0909341SAndroid Build Coastguard Worker punpckldq m11, m4 3047*c0909341SAndroid Build Coastguard Worker punpckldq m4, m7, m1 3048*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m1 3049*c0909341SAndroid Build Coastguard Worker punpckhqdq m12, m6, m0 3050*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m6 ; out4 3051*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m7, m4 3052*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m7 ; out5 3053*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m3, m2 3054*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3 ; out6 3055*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m5, m11 3056*c0909341SAndroid Build Coastguard Worker punpcklqdq m11, m5 ; out7 3057*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m0 3058*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m4 3059*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m2 3060*c0909341SAndroid Build Coastguard Worker.fast: 3061*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+32*0, 32*2 3062*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3063*c0909341SAndroid Build Coastguard Worker vperm2i128 m8, m0, m4, 0x31 3064*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 3065*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m1, m5, 0x31 3066*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm5, 1 3067*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m2, m6, 0x31 3068*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm6, 1 3069*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m3, m7, 0x31 3070*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm7, 1 3071*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_8192)] 3072*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3073*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m7}, 0, 2, 4, 6, 8, 10, 12, 14 3074*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m1 3075*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 3076*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 3077*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 3078*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m8, m4 3079*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m4 3080*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5, m6 3081*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 3082*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m0, m2 3083*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3084*c0909341SAndroid Build Coastguard Worker punpckldq m2, m8, m5 3085*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m5 3086*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m3, m4 3087*c0909341SAndroid Build Coastguard Worker punpckldq m3, m4 3088*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m7, m1 3089*c0909341SAndroid Build Coastguard Worker punpckldq m7, m1 3090*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m7, m4 3091*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m4 ; out9 3092*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m2, m8 ; out10 3093*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m8 3094*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m3, m5 3095*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m5 3096*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m0, m6 ; out8 3097*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m6 3098*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m0, m1, m2, m3, m4, m5, m7 3099*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3100*c0909341SAndroid Build Coastguard Worker jg .full 3101*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m5 3102*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 3103*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m4 3104*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m9, m8 3105*c0909341SAndroid Build Coastguard Worker pxor m4, m4 3106*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 3107*c0909341SAndroid Build Coastguard Worker call .main_fast 3108*c0909341SAndroid Build Coastguard Worker jmp .pass2 3109*c0909341SAndroid Build Coastguard Worker.dconly: 3110*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 3111*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 3112*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 3113*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3114*c0909341SAndroid Build Coastguard Worker or r3d, 32 3115*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_8bpc).dconly 3116*c0909341SAndroid Build Coastguard Worker.full: 3117*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m12, m13, m14, m15 3118*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9, [rsp+32*2] 3119*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m4 3120*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m9, [rsp+32*0] 3121*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m5 3122*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m9, [rsp+32*1] 3123*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 3124*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, m11 3125*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m9, m8 3126*c0909341SAndroid Build Coastguard Worker call .main 3127*c0909341SAndroid Build Coastguard Worker.pass2: 3128*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_2048)] 3129*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m12}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3130*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m13, m14, m15 3131*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, [rsp] 3132*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m2, m4, m6, m8, m10, m12, m14 3133*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q2031}, m1, m3, m5, m7, m9, m11, m13, m15 3134*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m4 3135*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m6 3136*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 3137*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 1, 4, 6 3138*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3139*c0909341SAndroid Build Coastguard Worker WRITE_8X4 2, 3, 4, 6 3140*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3141*c0909341SAndroid Build Coastguard Worker WRITE_8X4 [rsp+32*0], 5, 4, 6 3142*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3143*c0909341SAndroid Build Coastguard Worker WRITE_8X4 [rsp+32*1], 7, 4, 6 3144*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3145*c0909341SAndroid Build Coastguard Worker WRITE_8X4 8, 9, 4, 6 3146*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3147*c0909341SAndroid Build Coastguard Worker WRITE_8X4 10, 11, 4, 6 3148*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3149*c0909341SAndroid Build Coastguard Worker WRITE_8X4 12, 13, 4, 6 3150*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3151*c0909341SAndroid Build Coastguard Worker WRITE_8X4 14, 15, 4, 6 3152*c0909341SAndroid Build Coastguard Worker RET 3153*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3154*c0909341SAndroid Build Coastguard Workercglobal_label .main_fast ; bottom half is zero 3155*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main 3156*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+gprsize+0*32] 3157*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+0*32], m0 3158*c0909341SAndroid Build Coastguard Worker mova m9, [rsp+gprsize+1*32] 3159*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+1*32], m1 3160*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+2*32] 3161*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+2*32], m6 3162*c0909341SAndroid Build Coastguard Worker lea r5, [r6-(o_base)+pw_201_4091x8] 3163*c0909341SAndroid Build Coastguard Worker ITX_UNPACK_MULHRSW 1, 8, 6, 201, 4091, m601, 4052 ; t16a, t31a, t23a, t24a 3164*c0909341SAndroid Build Coastguard Worker ITX_UNPACK_MULHRSW 15, 9, 6, 995, 3973, m1380, 3857 ; t20a, t27a, t19a, t28a 3165*c0909341SAndroid Build Coastguard Worker ITX_UNPACK_MULHRSW 14, 0, 6, 1751, 3703, m2106, 3513 ; t18a, t29a, t21a, t26a 3166*c0909341SAndroid Build Coastguard Worker ITX_UNPACK_MULHRSW 13, 11, 6, 2440, 3290, m2751, 3035 ; t22a, t25a, t17a, t30a 3167*c0909341SAndroid Build Coastguard Worker jmp .main2 3168*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3169*c0909341SAndroid Build Coastguard Workercglobal_label .main 3170*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main 3171*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+gprsize+0*32] 3172*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+0*32], m0 3173*c0909341SAndroid Build Coastguard Worker mova m9, [rsp+gprsize+1*32] 3174*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+1*32], m1 3175*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+2*32] 3176*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+2*32], m6 3177*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m15, m8 ; in31 in1 3178*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m15 ; in3 in29 3179*c0909341SAndroid Build Coastguard Worker punpcklwd m15, m14, m9 ; in27 in5 3180*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m14 ; in7 in25 3181*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m13, m0 ; in23 in9 3182*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m13 ; in11 in21 3183*c0909341SAndroid Build Coastguard Worker punpcklwd m13, m12, m11 ; in19 in13 3184*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m12 ; in15 in17 3185*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 1, 6, 12, 10, 201, 4091, 3 ; t16a, t31a 3186*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 8, 6, 12, 10, 4052, 601, 3 ; t23a, t24a 3187*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 15, 6, 12, 10, 995, 3973, 3 ; t20a, t27a 3188*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 9, 6, 12, 10, 3857, 1380, 3 ; t19a, t28a 3189*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 14, 6, 12, 10, 1751, 3703, 3 ; t18a, t29a 3190*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 6, 12, 10, 3513, 2106, 3 ; t21a, t26a 3191*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 13, 6, 12, 10, 2440, 3290, 3 ; t22a, t25a 3192*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 11, 6, 12, 10, 3035, 2751, 3 ; t17a, t30a 3193*c0909341SAndroid Build Coastguard Worker.main2: 3194*c0909341SAndroid Build Coastguard Worker psubsw m6, m1, m11 ; t17 t30 3195*c0909341SAndroid Build Coastguard Worker paddsw m1, m11 ; t16 t31 3196*c0909341SAndroid Build Coastguard Worker psubsw m11, m9, m14 ; t18 t29 3197*c0909341SAndroid Build Coastguard Worker paddsw m9, m14 ; t19 t28 3198*c0909341SAndroid Build Coastguard Worker psubsw m14, m15, m0 ; t21 t26 3199*c0909341SAndroid Build Coastguard Worker paddsw m15, m0 ; t20 t27 3200*c0909341SAndroid Build Coastguard Worker psubsw m0, m8, m13 ; t22 t25 3201*c0909341SAndroid Build Coastguard Worker paddsw m8, m13 ; t23 t24 3202*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 6, 12, 13, 10, 799, 4017, 3 ; t17a t30a 3203*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 11, 12, 13, 10, m4017, 799, 3 ; t18a t29a 3204*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 14, 12, 13, 10, 3406, 2276, 3 ; t21a t26a 3205*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 12, 13, 10, m2276, 3406, 3 ; t22a t25a 3206*c0909341SAndroid Build Coastguard Worker psubsw m13, m1, m9 ; t19a t28a 3207*c0909341SAndroid Build Coastguard Worker paddsw m1, m9 ; t16a t31a 3208*c0909341SAndroid Build Coastguard Worker psubsw m9, m8, m15 ; t20a t27a 3209*c0909341SAndroid Build Coastguard Worker paddsw m8, m15 ; t23a t24a 3210*c0909341SAndroid Build Coastguard Worker psubsw m15, m6, m11 ; t18 t29 3211*c0909341SAndroid Build Coastguard Worker paddsw m6, m11 ; t17 t30 3212*c0909341SAndroid Build Coastguard Worker psubsw m11, m0, m14 ; t21 t26 3213*c0909341SAndroid Build Coastguard Worker paddsw m0, m14 ; t22 t25 3214*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 15, 12, 14, 10, 1567, 3784, 3 ; t18a t29a 3215*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 13, 12, 14, 10, 1567, 3784, 3 ; t19 t28 3216*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 9, 12, 14, 10, m3784, 1567, 3 ; t20 t27 3217*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 11, 12, 14, 10, m3784, 1567, 3 ; t21a t26a 3218*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m12, [o(deint_shuf)] 3219*c0909341SAndroid Build Coastguard Worker psubsw m14, m1, m8 ; t23 t24 3220*c0909341SAndroid Build Coastguard Worker paddsw m1, m8 ; t16 t31 3221*c0909341SAndroid Build Coastguard Worker psubsw m8, m6, m0 ; t22a t25a 3222*c0909341SAndroid Build Coastguard Worker paddsw m6, m0 ; t17a t30a 3223*c0909341SAndroid Build Coastguard Worker psubsw m0, m15, m11 ; t21 t26 3224*c0909341SAndroid Build Coastguard Worker paddsw m15, m11 ; t18 t29 3225*c0909341SAndroid Build Coastguard Worker psubsw m11, m13, m9 ; t20a t27a 3226*c0909341SAndroid Build Coastguard Worker paddsw m13, m9 ; t19a t28a 3227*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m1, m6, m15, m13 3228*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 14, 9, 12, 10, 2896, 2896 ; t24a t23a 3229*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_m2896_2896)] 3230*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 8, 12, _, 10, 12, 9, 4 ; t22 t25 3231*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_2896_2896)] 3232*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 0, 12, _, 10, 12, 9, 4 ; t21a t26a 3233*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_2896_2896)] 3234*c0909341SAndroid Build Coastguard Worker ITX_MUL2X_PACK 11, 9, _, 10, 9, 12, 4 ; t27 t20 3235*c0909341SAndroid Build Coastguard Worker shufps m9, m14, m8, q1032 ; t23a t22 3236*c0909341SAndroid Build Coastguard Worker vpblendd m14, m8, 0xcc ; t24a t25 3237*c0909341SAndroid Build Coastguard Worker shufps m8, m11, m0, q1032 ; t20 t21a 3238*c0909341SAndroid Build Coastguard Worker vpblendd m11, m0, 0xcc ; t27 t26a 3239*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1, m6 ; t16 t17a 3240*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m6 ; t31 t30a 3241*c0909341SAndroid Build Coastguard Worker psubsw m10, m5, m8 ; out20 out21 3242*c0909341SAndroid Build Coastguard Worker paddsw m5, m8 ; out11 out10 3243*c0909341SAndroid Build Coastguard Worker psubsw m6, m3, m14 ; out24 out25 3244*c0909341SAndroid Build Coastguard Worker paddsw m3, m14 ; out7 out6 3245*c0909341SAndroid Build Coastguard Worker psubsw m8, m7, m0 ; out16 out17 3246*c0909341SAndroid Build Coastguard Worker paddsw m7, m0 ; out15 out14 3247*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+gprsize+0*32] 3248*c0909341SAndroid Build Coastguard Worker punpcklqdq m12, m13, m15 ; t19a t18 3249*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m15 ; t28a t29 3250*c0909341SAndroid Build Coastguard Worker psubsw m15, m0, m1 ; out31 out30 3251*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; out0 out1 3252*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+1*32] 3253*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+0*32], m6 3254*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+2*32] 3255*c0909341SAndroid Build Coastguard Worker psubsw m14, m1, m13 ; out28 out29 3256*c0909341SAndroid Build Coastguard Worker paddsw m1, m13 ; out3 out2 3257*c0909341SAndroid Build Coastguard Worker psubsw m13, m2, m11 ; out27 out26 3258*c0909341SAndroid Build Coastguard Worker paddsw m2, m11 ; out4 out5 3259*c0909341SAndroid Build Coastguard Worker psubsw m11, m4, m9 ; out23 out22 3260*c0909341SAndroid Build Coastguard Worker paddsw m4, m9 ; out8 out9 3261*c0909341SAndroid Build Coastguard Worker psubsw m9, m6, m12 ; out19 out18 3262*c0909341SAndroid Build Coastguard Worker paddsw m6, m12 ; out12 out13 3263*c0909341SAndroid Build Coastguard Worker ret 3264*c0909341SAndroid Build Coastguard Worker 3265*c0909341SAndroid Build Coastguard Worker%macro LOAD_PACKED_16X2 4 ; dst, tmp, row[1-2] 3266*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m%1, [cq+16*%3] 3267*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m%2, [cq+16*%4] 3268*c0909341SAndroid Build Coastguard Worker shufpd m%1, m%2, 0x0c 3269*c0909341SAndroid Build Coastguard Worker%endmacro 3270*c0909341SAndroid Build Coastguard Worker 3271*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 4, 0, dst, stride, c, eob 3272*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 3273*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3274*c0909341SAndroid Build Coastguard Worker jnz .normal 3275*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 3276*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 3277*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 3278*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3279*c0909341SAndroid Build Coastguard Worker or r3d, 8 3280*c0909341SAndroid Build Coastguard Worker.dconly: 3281*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 3282*c0909341SAndroid Build Coastguard Worker movd xm2, [pw_2048] ; intentionally rip-relative 3283*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 3284*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 3285*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 3286*c0909341SAndroid Build Coastguard Worker pxor m3, m3 3287*c0909341SAndroid Build Coastguard Worker.dconly_loop: 3288*c0909341SAndroid Build Coastguard Worker mova m1, [dstq] 3289*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m1, m3 3290*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m3 3291*c0909341SAndroid Build Coastguard Worker paddw m2, m0 3292*c0909341SAndroid Build Coastguard Worker paddw m1, m0 3293*c0909341SAndroid Build Coastguard Worker packuswb m1, m2 3294*c0909341SAndroid Build Coastguard Worker mova [dstq], m1 3295*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3296*c0909341SAndroid Build Coastguard Worker dec r3d 3297*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 3298*c0909341SAndroid Build Coastguard Worker RET 3299*c0909341SAndroid Build Coastguard Worker.normal: 3300*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 4, 16, 32*3, dst, stride, c, eob 3301*c0909341SAndroid Build Coastguard Worker %undef cmp 3302*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 0, 7, 0, 2 ; in0 in2 3303*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 4, 7, 1, 3 ; in1 in3 3304*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 1, 7, 4, 6 ; in4 in6 3305*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 5, 7, 5, 7 ; in5 in7 3306*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3307*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 0, 1, 2, 3 3308*c0909341SAndroid Build Coastguard Worker add cq, 16*16 3309*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 2, 7, -8, -6 ; in8 in10 3310*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 6, 7, -7, -5 ; in9 in11 3311*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 3, 7, -4, -2 ; in12 in14 3312*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 11, 7, -3, -1 ; in13 in15 3313*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, -4, -3, -2, -1 3314*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m4 3315*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m5 3316*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m6 3317*c0909341SAndroid Build Coastguard Worker cmp eobd, 106 3318*c0909341SAndroid Build Coastguard Worker jg .full 3319*c0909341SAndroid Build Coastguard Worker pxor m4, m4 3320*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 3321*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast 3322*c0909341SAndroid Build Coastguard Worker jmp .pass2 3323*c0909341SAndroid Build Coastguard Worker.full: 3324*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 4, 7, 0, 2 ; in16 in18 3325*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 12, 7, 3, 1 ; in19 in17 3326*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 5, 7, 4, 6 ; in20 in22 3327*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 13, 7, 7, 5 ; in23 in21 3328*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 0, 1, 2, 3 3329*c0909341SAndroid Build Coastguard Worker add cq, 16*8 3330*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 6, 7, 0, 2 ; in24 in26 3331*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 14, 7, 3, 1 ; in27 in25 3332*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 7, 8, 4, 6 ; in28 in30 3333*c0909341SAndroid Build Coastguard Worker LOAD_PACKED_16X2 15, 8, 7, 5 ; in31 in29 3334*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3335*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 0, 1, 2, 3 3336*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main 3337*c0909341SAndroid Build Coastguard Worker.pass2: 3338*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_8192)] 3339*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m12}, m8, m9, m10, m11, m13, m14, m15 3340*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m9 3341*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m10 3342*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m0, m2 3343*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 3344*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m3 3345*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 3346*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m4, m6 3347*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m6 3348*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m5, m7 3349*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 3350*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m9 3351*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m9 3352*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m2, m1 3353*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 3354*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m10, m4 3355*c0909341SAndroid Build Coastguard Worker punpckhwd m10, m4 3356*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 3357*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 3358*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 3359*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3360*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m9 3361*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m9 3362*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m4 3363*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m4 3364*c0909341SAndroid Build Coastguard Worker punpckldq m9, m10, m5 3365*c0909341SAndroid Build Coastguard Worker punpckhdq m10, m5 3366*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m12}, m0, m1, m2, m3, m6, m7, m9, m10 3367*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, [rsp+32*0] 3368*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m8 3369*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m6, 0x31 3370*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm6, 1 3371*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m7, 0x31 3372*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm7, 1 3373*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m2, m9, 0x31 3374*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm9, 1 3375*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m10, 0x31 3376*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm10, 1 3377*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 3378*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2048)] 3379*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 3380*c0909341SAndroid Build Coastguard Worker lea r2, [strideq*3] 3381*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 3382*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r2 3383*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+strideq*4] 3384*c0909341SAndroid Build Coastguard Worker %define dstq r3 3385*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 3386*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r2 3387*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32*0] 3388*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 3389*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+32*2] 3390*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m2 3391*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 3392*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m11 3393*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m11 3394*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m12, m14 3395*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m14 3396*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m13, m15 3397*c0909341SAndroid Build Coastguard Worker punpcklwd m13, m15 3398*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m7 3399*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7 3400*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m2, m1 3401*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 3402*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m12, m4 3403*c0909341SAndroid Build Coastguard Worker punpckhwd m12, m4 3404*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m13 3405*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m13 3406*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 3407*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3408*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m9 3409*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m9 3410*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m4 3411*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m4 3412*c0909341SAndroid Build Coastguard Worker punpckldq m9, m12, m5 3413*c0909341SAndroid Build Coastguard Worker punpckhdq m12, m5 3414*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m6, 0x31 3415*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm6, 1 3416*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m7, 0x31 3417*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm7, 1 3418*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m2, m9, 0x31 3419*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm9, 1 3420*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m12, 0x31 3421*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm12, 1 3422*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main2 3423*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2048)] 3424*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 3425*c0909341SAndroid Build Coastguard Worker add r0, 16 3426*c0909341SAndroid Build Coastguard Worker add r3, 16 3427*c0909341SAndroid Build Coastguard Worker %define dstq r0 3428*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 3429*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r2 3430*c0909341SAndroid Build Coastguard Worker %define dstq r3 3431*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 3432*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r2 3433*c0909341SAndroid Build Coastguard Worker RET 3434*c0909341SAndroid Build Coastguard Worker 3435*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_8bpc, 4, 5, 11, dst, stride, c, eob 3436*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_5] 3437*c0909341SAndroid Build Coastguard Worker lea r4, [strideq*3] 3438*c0909341SAndroid Build Coastguard Worker sub eobd, 107 ; loop_iterations = 1 + (eobd >= 107) 3439*c0909341SAndroid Build Coastguard Worker.loop: 3440*c0909341SAndroid Build Coastguard Worker mova xm0,[cq+16* 0] 3441*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16* 4] 3442*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16* 1], 1 3443*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16* 5], 1 3444*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3445*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m8 3446*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m8 3447*c0909341SAndroid Build Coastguard Worker add cq, 16*16 3448*c0909341SAndroid Build Coastguard Worker mova xm2, [cq-16* 8] 3449*c0909341SAndroid Build Coastguard Worker mova xm3, [cq-16* 4] 3450*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq-16* 7], 1 3451*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq-16* 3], 1 3452*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+16* 0] 3453*c0909341SAndroid Build Coastguard Worker mova xm5, [cq+16* 4] 3454*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16* 1], 1 3455*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+16* 5], 1 3456*c0909341SAndroid Build Coastguard Worker mova xm6, [cq+16* 8] 3457*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+16*12] 3458*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+16* 9], 1 3459*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+16*13], 1 3460*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, -4, -2, 0, 2, 4, 6 3461*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 3462*c0909341SAndroid Build Coastguard Worker call .transpose8x8 3463*c0909341SAndroid Build Coastguard Worker REPX {psraw x, 3 }, m0, m1, m2, m3, m4, m5, m6, m7 3464*c0909341SAndroid Build Coastguard Worker WRITE_8X4 0, 4, 8, 10, strideq*8, strideq*4, r4*4 3465*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3466*c0909341SAndroid Build Coastguard Worker WRITE_8X4 1, 5, 0, 4, strideq*8, strideq*4, r4*4 3467*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3468*c0909341SAndroid Build Coastguard Worker WRITE_8X4 2, 6, 0, 4, strideq*8, strideq*4, r4*4 3469*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3470*c0909341SAndroid Build Coastguard Worker WRITE_8X4 3, 7, 0, 4, strideq*8, strideq*4, r4*4 3471*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3472*c0909341SAndroid Build Coastguard Worker sub cq, 16*16-32 3473*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+r4*4] 3474*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 3475*c0909341SAndroid Build Coastguard Worker jnc .loop 3476*c0909341SAndroid Build Coastguard Worker RET 3477*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3478*c0909341SAndroid Build Coastguard Worker.transpose8x8: 3479*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m4, m5 3480*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 3481*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m1 3482*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 3483*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6, m7 3484*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 3485*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m2, m3 3486*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 3487*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 3488*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3489*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 3490*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 3491*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m5, m7 3492*c0909341SAndroid Build Coastguard Worker punpckldq m5, m7 3493*c0909341SAndroid Build Coastguard Worker punpckldq m7, m8, m1 3494*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m1 3495*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 3496*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 3497*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 3498*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 3499*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5, m7 3500*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7 3501*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m8 3502*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m8 3503*c0909341SAndroid Build Coastguard Worker ret 3504*c0909341SAndroid Build Coastguard Worker 3505*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_8bpc, 4, 6, 10, dst, stride, c, eob 3506*c0909341SAndroid Build Coastguard Worker add cq, 16*8 3507*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_4096] 3508*c0909341SAndroid Build Coastguard Worker lea r4, [strideq*3] 3509*c0909341SAndroid Build Coastguard Worker lea r5, [dstq+strideq*4] 3510*c0909341SAndroid Build Coastguard Worker sub eobd, 107 3511*c0909341SAndroid Build Coastguard Worker.loop: 3512*c0909341SAndroid Build Coastguard Worker mova xm0, [cq-16*8] 3513*c0909341SAndroid Build Coastguard Worker mova xm1, [cq-16*7] 3514*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*0], 1 3515*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16*1], 1 3516*c0909341SAndroid Build Coastguard Worker mova xm2, [cq-16*6] 3517*c0909341SAndroid Build Coastguard Worker mova xm3, [cq-16*5] 3518*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+16*2], 1 3519*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+16*3], 1 3520*c0909341SAndroid Build Coastguard Worker mova xm4, [cq-16*4] 3521*c0909341SAndroid Build Coastguard Worker mova xm5, [cq-16*3] 3522*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+16*4], 1 3523*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+16*5], 1 3524*c0909341SAndroid Build Coastguard Worker mova xm6, [cq-16*2] 3525*c0909341SAndroid Build Coastguard Worker mova xm7, [cq-16*1] 3526*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+16*6], 1 3527*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+16*7], 1 3528*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3529*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, -4, -3, -2, -1, 0, 1, 2, 3 3530*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 3531*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 3532*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 3533*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r4 3534*c0909341SAndroid Build Coastguard Worker %define dstq r5 3535*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 3536*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r4 3537*c0909341SAndroid Build Coastguard Worker add cq, 16*16 3538*c0909341SAndroid Build Coastguard Worker add r0, 16 3539*c0909341SAndroid Build Coastguard Worker add r5, 16 3540*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 3541*c0909341SAndroid Build Coastguard Worker jnc .loop 3542*c0909341SAndroid Build Coastguard Worker RET 3543*c0909341SAndroid Build Coastguard Worker 3544*c0909341SAndroid Build Coastguard Worker%define o_base pw_5 + 128 3545*c0909341SAndroid Build Coastguard Worker 3546*c0909341SAndroid Build Coastguard Worker%macro LOAD_16ROWS 2-4 0, 1 ; src, stride, is_rect2, zero_coefs 3547*c0909341SAndroid Build Coastguard Worker%if %3 3548*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2896x8)] 3549*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m15, [%1+%2* 0] 3550*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, [%1+%2* 1] 3551*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m15, [%1+%2* 2] 3552*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m15, [%1+%2* 3] 3553*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m15, [%1+%2* 4] 3554*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m15, [%1+%2* 5] 3555*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m15, [%1+%2* 6] 3556*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m15, [%1+%2* 7] 3557*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m15, [%1+%2* 8] 3558*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m15, [%1+%2* 9] 3559*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m15, [%1+%2*10] 3560*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m15, [%1+%2*11] 3561*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m15, [%1+%2*12] 3562*c0909341SAndroid Build Coastguard Worker pmulhrsw m13, m15, [%1+%2*13] 3563*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m15, [%1+%2*14] 3564*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, [%1+%2*15] 3565*c0909341SAndroid Build Coastguard Worker%else 3566*c0909341SAndroid Build Coastguard Worker mova m0, [%1+%2* 0] 3567*c0909341SAndroid Build Coastguard Worker mova m1, [%1+%2* 1] 3568*c0909341SAndroid Build Coastguard Worker mova m2, [%1+%2* 2] 3569*c0909341SAndroid Build Coastguard Worker mova m3, [%1+%2* 3] 3570*c0909341SAndroid Build Coastguard Worker mova m4, [%1+%2* 4] 3571*c0909341SAndroid Build Coastguard Worker mova m5, [%1+%2* 5] 3572*c0909341SAndroid Build Coastguard Worker mova m6, [%1+%2* 6] 3573*c0909341SAndroid Build Coastguard Worker mova m7, [%1+%2* 7] 3574*c0909341SAndroid Build Coastguard Worker mova m8, [%1+%2* 8] 3575*c0909341SAndroid Build Coastguard Worker mova m9, [%1+%2* 9] 3576*c0909341SAndroid Build Coastguard Worker mova m10, [%1+%2*10] 3577*c0909341SAndroid Build Coastguard Worker mova m11, [%1+%2*11] 3578*c0909341SAndroid Build Coastguard Worker mova m12, [%1+%2*12] 3579*c0909341SAndroid Build Coastguard Worker mova m13, [%1+%2*13] 3580*c0909341SAndroid Build Coastguard Worker mova m14, [%1+%2*14] 3581*c0909341SAndroid Build Coastguard Worker mova m15, [%1+%2*15] 3582*c0909341SAndroid Build Coastguard Worker%endif 3583*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 3584*c0909341SAndroid Build Coastguard Worker%if %4 3585*c0909341SAndroid Build Coastguard Worker pxor m15, m15 3586*c0909341SAndroid Build Coastguard Worker REPX {mova [%1+%2*x], m15}, 0, 1, 2, 3, 4, 5, 6, 7, \ 3587*c0909341SAndroid Build Coastguard Worker 8, 9, 10, 11, 12, 13, 14, 15 3588*c0909341SAndroid Build Coastguard Worker%endif 3589*c0909341SAndroid Build Coastguard Worker%endmacro 3590*c0909341SAndroid Build Coastguard Worker 3591*c0909341SAndroid Build Coastguard Worker%macro IDCT32_PASS2_END 7 ; coefs[1-2], tmp[1-2], rnd, offset[1-2] 3592*c0909341SAndroid Build Coastguard Worker mova m%4, [%2] 3593*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%1, m%4 3594*c0909341SAndroid Build Coastguard Worker psubsw m%1, m%4 3595*c0909341SAndroid Build Coastguard Worker pmovzxbw m%4, [dstq+%6] 3596*c0909341SAndroid Build Coastguard Worker pmulhrsw m%3, m%5 3597*c0909341SAndroid Build Coastguard Worker pmulhrsw m%1, m%5 3598*c0909341SAndroid Build Coastguard Worker paddw m%3, m%4 3599*c0909341SAndroid Build Coastguard Worker pmovzxbw m%4, [r2+%7] 3600*c0909341SAndroid Build Coastguard Worker paddw m%1, m%4 3601*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%1 3602*c0909341SAndroid Build Coastguard Worker vpermq m%3, m%3, q3120 3603*c0909341SAndroid Build Coastguard Worker mova [dstq+%6], xm%3 3604*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+%7], m%3, 1 3605*c0909341SAndroid Build Coastguard Worker%endmacro 3606*c0909341SAndroid Build Coastguard Worker 3607*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 4, 0, dst, stride, c, eob 3608*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 3609*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3610*c0909341SAndroid Build Coastguard Worker jz .dconly 3611*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*35, dst, stride, c, eob, tmp1, tmp2, \ 3612*c0909341SAndroid Build Coastguard Worker base, tmp3 3613*c0909341SAndroid Build Coastguard Worker %undef cmp 3614*c0909341SAndroid Build Coastguard Worker LOAD_16ROWS cq, 64, 1 3615*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 3616*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 3617*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 3618*c0909341SAndroid Build Coastguard Worker lea tmp3q, [tmp1q+32*16] 3619*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 3620*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 3621*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 3622*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_16384)] 3623*c0909341SAndroid Build Coastguard Worker call .transpose_2x8x8_round 3624*c0909341SAndroid Build Coastguard Worker mova m15, [rsp+32*0] 3625*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*4+ 0], xm0 3626*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*0+ 0], m0, 1 3627*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*3+ 0], xm2 3628*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*1+ 0], m2, 1 3629*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*2+ 0], xm4 3630*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*2+ 0], m4, 1 3631*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*1+ 0], xm6 3632*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*3+ 0], m6, 1 3633*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*4+16], xm8 3634*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*0+16], m8, 1 3635*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*3+16], xm10 3636*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*1+16], m10, 1 3637*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*2+16], xm12 3638*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*2+16], m12, 1 3639*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*1+16], xm14 3640*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp3q+32*3+16], m14, 1 3641*c0909341SAndroid Build Coastguard Worker cmp eobd, 150 3642*c0909341SAndroid Build Coastguard Worker jg .full 3643*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m1, xm9, 1 3644*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m1, m9, 0x31 3645*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m5, xm13, 1 3646*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m5, m13, 0x31 3647*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m3, xm11, 1 3648*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m3, m11, 0x31 3649*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m7, xm15, 1 3650*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m7, m15, 0x31 3651*c0909341SAndroid Build Coastguard Worker call .main_oddhalf_fast 3652*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3653*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 3654*c0909341SAndroid Build Coastguard Worker jmp .idct16 3655*c0909341SAndroid Build Coastguard Worker.dconly: 3656*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 3657*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 3658*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 3659*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3660*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 3661*c0909341SAndroid Build Coastguard Worker or r3d, 32 3662*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 3663*c0909341SAndroid Build Coastguard Worker.full: 3664*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m1 3665*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m3 3666*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m5 3667*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m7 3668*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m9 3669*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m11 3670*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m13 3671*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 3672*c0909341SAndroid Build Coastguard Worker LOAD_16ROWS cq+32, 64, 1 3673*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 3674*c0909341SAndroid Build Coastguard Worker lea r2, [tmp3q+32*8] 3675*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 3676*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 3677*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 3678*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_16384)] 3679*c0909341SAndroid Build Coastguard Worker call .transpose_2x8x8_round 3680*c0909341SAndroid Build Coastguard Worker mova m15, [rsp+32*0] 3681*c0909341SAndroid Build Coastguard Worker mova [r2-32*4+ 0], xm0 3682*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*0+ 0], m0, 1 3683*c0909341SAndroid Build Coastguard Worker mova [r2-32*3+ 0], xm2 3684*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*1+ 0], m2, 1 3685*c0909341SAndroid Build Coastguard Worker mova [r2-32*2+ 0], xm4 3686*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*2+ 0], m4, 1 3687*c0909341SAndroid Build Coastguard Worker mova [r2-32*1+ 0], xm6 3688*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*3+ 0], m6, 1 3689*c0909341SAndroid Build Coastguard Worker mova [r2-32*4+16], xm8 3690*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*0+16], m8, 1 3691*c0909341SAndroid Build Coastguard Worker mova [r2-32*3+16], xm10 3692*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*1+16], m10, 1 3693*c0909341SAndroid Build Coastguard Worker mova [r2-32*2+16], xm12 3694*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*2+16], m12, 1 3695*c0909341SAndroid Build Coastguard Worker mova [r2-32*1+16], xm14 3696*c0909341SAndroid Build Coastguard Worker vextracti128 [r2+32*3+16], m14, 1 3697*c0909341SAndroid Build Coastguard Worker vinserti128 m8, m1, xm9, 1 3698*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m1, m9, 0x31 3699*c0909341SAndroid Build Coastguard Worker mova xm0, [tmp1q-32*4] 3700*c0909341SAndroid Build Coastguard Worker mova xm1, [tmp1q-32*3] 3701*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [tmp1q+32*0], 1 3702*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [tmp1q+32*1], 1 3703*c0909341SAndroid Build Coastguard Worker vinserti128 m10, m5, xm13, 1 3704*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m5, m13, 0x31 3705*c0909341SAndroid Build Coastguard Worker mova xm4, [tmp1q-32*4+16] 3706*c0909341SAndroid Build Coastguard Worker mova xm5, [tmp1q-32*3+16] 3707*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmp1q+32*0+16], 1 3708*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmp1q+32*1+16], 1 3709*c0909341SAndroid Build Coastguard Worker vinserti128 m9, m3, xm11, 1 3710*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m3, m11, 0x31 3711*c0909341SAndroid Build Coastguard Worker mova xm2, [tmp1q-32*2] 3712*c0909341SAndroid Build Coastguard Worker mova xm3, [tmp1q-32*1] 3713*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmp1q+32*2], 1 3714*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmp1q+32*3], 1 3715*c0909341SAndroid Build Coastguard Worker vinserti128 m11, m7, xm15, 1 3716*c0909341SAndroid Build Coastguard Worker vperm2i128 m15, m7, m15, 0x31 3717*c0909341SAndroid Build Coastguard Worker mova xm6, [tmp1q-32*2+16] 3718*c0909341SAndroid Build Coastguard Worker mova xm7, [tmp1q-32*1+16] 3719*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [tmp1q+32*2+16], 1 3720*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [tmp1q+32*3+16], 1 3721*c0909341SAndroid Build Coastguard Worker call .main_oddhalf 3722*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H r2-32*4, 32 3723*c0909341SAndroid Build Coastguard Worker.idct16: 3724*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp3q-32*4, 32 3725*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 3726*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 3727*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 3728*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 3729*c0909341SAndroid Build Coastguard Worker add r2, dstq 3730*c0909341SAndroid Build Coastguard Worker call .pass2_end 3731*c0909341SAndroid Build Coastguard Worker RET 3732*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3733*c0909341SAndroid Build Coastguard Workercglobal_label .main_oddhalf_fast ; lower half is zero 3734*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m7 3735*c0909341SAndroid Build Coastguard Worker pxor m7, m7 3736*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m7 3737*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m7 3738*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_3703x8)] 3739*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_1751x8)] 3740*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_m1380x8)] 3741*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_3857x8)] 3742*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pw_3973x8)] 3743*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_995x8)] 3744*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m4 ; t29a 3745*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7 ; t18a 3746*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m3 ; t19a 3747*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m8 ; t28a 3748*c0909341SAndroid Build Coastguard Worker pmulhrsw m13, m2 ; t27a 3749*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m15 ; t20a 3750*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_m2106x8)] 3751*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_3513x8)] 3752*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_3290x8)] 3753*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_2440x8)] 3754*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_m601x8)] 3755*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_4052x8)] 3756*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m5 ; t21a 3757*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7 ; t26a 3758*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m6 ; t25a 3759*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m8 ; t22a 3760*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m1 ; t23a 3761*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15 ; t24a 3762*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 3763*c0909341SAndroid Build Coastguard Worker jmp .main2 3764*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3765*c0909341SAndroid Build Coastguard Workercglobal_label .main_oddhalf 3766*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m15 3767*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m7 3768*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m8 3769*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 3770*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 11, 7, 8, 15, 1751, 3703 ; t18a, t29a 3771*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 12, 3, 7, 8, 15, 3857, 1380 ; t19a, t28a 3772*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 13, 7, 8, 15, 995, 3973 ; t20a, t27a 3773*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 10, 5, 7, 8, 15, 3513, 2106 ; t21a, t26a 3774*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 9, 7, 8, 15, 2440, 3290 ; t22a, t25a 3775*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 14, 1, 7, 8, 15, 4052, 601 ; t23a, t24a 3776*c0909341SAndroid Build Coastguard Worker.main2: 3777*c0909341SAndroid Build Coastguard Worker psubsw m7, m12, m4 ; t18 3778*c0909341SAndroid Build Coastguard Worker paddsw m12, m4 ; t19 3779*c0909341SAndroid Build Coastguard Worker psubsw m4, m2, m10 ; t21 3780*c0909341SAndroid Build Coastguard Worker paddsw m2, m10 ; t20 3781*c0909341SAndroid Build Coastguard Worker psubsw m10, m14, m6 ; t22 3782*c0909341SAndroid Build Coastguard Worker paddsw m14, m6 ; t23 3783*c0909341SAndroid Build Coastguard Worker psubsw m6, m1, m9 ; t25 3784*c0909341SAndroid Build Coastguard Worker paddsw m1, m9 ; t24 3785*c0909341SAndroid Build Coastguard Worker psubsw m9, m13, m5 ; t26 3786*c0909341SAndroid Build Coastguard Worker paddsw m13, m5 ; t27 3787*c0909341SAndroid Build Coastguard Worker psubsw m5, m3, m11 ; t29 3788*c0909341SAndroid Build Coastguard Worker paddsw m3, m11 ; t28 3789*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 7, 8, 11, 15, m4017, 799 ; t18a, t29a 3790*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 9, 4, 8, 11, 15, 3406, 2276 ; t21a, t26a 3791*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 10, 8, 11, 15, m2276, 3406 ; t22a, t25a 3792*c0909341SAndroid Build Coastguard Worker psubsw m8, m14, m2 ; t20a 3793*c0909341SAndroid Build Coastguard Worker paddsw m14, m2 ; t23a 3794*c0909341SAndroid Build Coastguard Worker psubsw m2, m1, m13 ; t27a 3795*c0909341SAndroid Build Coastguard Worker paddsw m1, m13 ; t24a 3796*c0909341SAndroid Build Coastguard Worker psubsw m13, m6, m9 ; t21 3797*c0909341SAndroid Build Coastguard Worker paddsw m6, m9 ; t22 3798*c0909341SAndroid Build Coastguard Worker psubsw m9, m10, m4 ; t26 3799*c0909341SAndroid Build Coastguard Worker paddsw m10, m4 ; t25 3800*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 8, 4, 11, 15, m3784, 1567 ; t20, t27 3801*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 9, 13, 4, 11, 15, m3784, 1567 ; t21a, t26a 3802*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+gprsize+32*0] ; in31 3803*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m6 ; t22 3804*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+32*1] ; in15 3805*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m14 ; t23a 3806*c0909341SAndroid Build Coastguard Worker mova m14, [rsp+gprsize+32*2] ; in17 3807*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m1 ; t24a 3808*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 0, 4, 1, 11, 15, 201, 4091 ; t16a, t31a 3809*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 14, 6, 1, 11, 15, 3035, 2751 ; t17a, t30a 3810*c0909341SAndroid Build Coastguard Worker psubsw m1, m0, m14 ; t17 3811*c0909341SAndroid Build Coastguard Worker paddsw m0, m14 ; t16 3812*c0909341SAndroid Build Coastguard Worker psubsw m14, m4, m6 ; t30 3813*c0909341SAndroid Build Coastguard Worker paddsw m4, m6 ; t31 3814*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 14, 1, 6, 11, 15, 799, 4017 ; t17a, t30a 3815*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m12 ; t19a 3816*c0909341SAndroid Build Coastguard Worker paddsw m0, m12 ; t16a 3817*c0909341SAndroid Build Coastguard Worker psubsw m12, m4, m3 ; t28a 3818*c0909341SAndroid Build Coastguard Worker paddsw m4, m3 ; t31a 3819*c0909341SAndroid Build Coastguard Worker psubsw m3, m14, m5 ; t18 3820*c0909341SAndroid Build Coastguard Worker paddsw m14, m5 ; t17 3821*c0909341SAndroid Build Coastguard Worker psubsw m5, m1, m7 ; t29 3822*c0909341SAndroid Build Coastguard Worker paddsw m1, m7 ; t30 3823*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 3, 7, 11, 15, 1567, 3784 ; t18a, t29a 3824*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 12, 6, 7, 11, 15, 1567, 3784 ; t19, t28 3825*c0909341SAndroid Build Coastguard Worker psubsw m7, m1, m10 ; t25a 3826*c0909341SAndroid Build Coastguard Worker paddsw m1, m10 ; t30a 3827*c0909341SAndroid Build Coastguard Worker psubsw m10, m5, m9 ; t21 3828*c0909341SAndroid Build Coastguard Worker paddsw m5, m9 ; t18 3829*c0909341SAndroid Build Coastguard Worker psubsw m9, m12, m2 ; t20a 3830*c0909341SAndroid Build Coastguard Worker paddsw m12, m2 ; t19a 3831*c0909341SAndroid Build Coastguard Worker psubsw m2, m3, m13 ; t26 3832*c0909341SAndroid Build Coastguard Worker paddsw m3, m13 ; t29 3833*c0909341SAndroid Build Coastguard Worker psubsw m13, m6, m8 ; t27a 3834*c0909341SAndroid Build Coastguard Worker paddsw m6, m8 ; t28a 3835*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m5 3836*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m12 3837*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m6 3838*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m3 3839*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m1 3840*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+gprsize+32*0] ; t22 3841*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+32*1] ; t23 3842*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+32*2] ; t24a 3843*c0909341SAndroid Build Coastguard Worker psubsw m1, m14, m5 ; t22a 3844*c0909341SAndroid Build Coastguard Worker paddsw m14, m5 ; t17a 3845*c0909341SAndroid Build Coastguard Worker psubsw m5, m0, m6 ; t23 3846*c0909341SAndroid Build Coastguard Worker paddsw m0, m6 ; t16 3847*c0909341SAndroid Build Coastguard Worker psubsw m6, m4, m3 ; t24 3848*c0909341SAndroid Build Coastguard Worker paddsw m4, m3 ; t31 3849*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_m2896_2896)] 3850*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(pw_2896_2896)] 3851*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 3852*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m14 3853*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m4 3854*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 13, 9, 0, 4, 15, 3, 8 ; t20, t27 3855*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 10, 0, 4, 15, 3, 8 ; t21a, t26a 3856*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 7, 1, 0, 4, 15, 3, 8 ; t22, t25 3857*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 5, 0, 4, 15, 3, 8 ; t23a, t24a 3858*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m13 3859*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m2 3860*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m7 3861*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m6 3862*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m5 3863*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m1 3864*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m10 3865*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m9 3866*c0909341SAndroid Build Coastguard Worker ret 3867*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3868*c0909341SAndroid Build Coastguard Worker.transpose_2x8x8_round: 3869*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m12, m13 3870*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13 3871*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m8, m9 3872*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9 3873*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m14, m15 3874*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15 3875*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m10, m11 3876*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m11 3877*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5 3878*c0909341SAndroid Build Coastguard Worker punpckhdq m11, m8, m10 3879*c0909341SAndroid Build Coastguard Worker punpckldq m8, m10 3880*c0909341SAndroid Build Coastguard Worker punpckldq m10, m12, m14 3881*c0909341SAndroid Build Coastguard Worker punpckhdq m12, m14 3882*c0909341SAndroid Build Coastguard Worker punpckhdq m14, m13, m15 3883*c0909341SAndroid Build Coastguard Worker punpckldq m13, m15 3884*c0909341SAndroid Build Coastguard Worker punpckldq m15, m6, m9 3885*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m9 3886*c0909341SAndroid Build Coastguard Worker punpckhqdq m9, m8, m10 3887*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m10 3888*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m11, m12 3889*c0909341SAndroid Build Coastguard Worker punpckhqdq m11, m12 3890*c0909341SAndroid Build Coastguard Worker punpcklqdq m12, m13, m15 3891*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m15 3892*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m14, m6 3893*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m6 3894*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [rsp+gprsize+32*0] 3895*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m7}, m8, m9, m10, m11, m12, m13, m14, m15 3896*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [rsp+gprsize+32*1] 3897*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m15 3898*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4, m5 3899*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 3900*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m1 3901*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 3902*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6, m7 3903*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 3904*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m2, m3 3905*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 3906*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 3907*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 3908*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 3909*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 3910*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m5, m7 3911*c0909341SAndroid Build Coastguard Worker punpckldq m5, m7 3912*c0909341SAndroid Build Coastguard Worker punpckldq m7, m15, m1 3913*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m1 3914*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 3915*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 3916*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 3917*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 3918*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5, m7 3919*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7 3920*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m15 3921*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m15 3922*c0909341SAndroid Build Coastguard Worker ret 3923*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3924*c0909341SAndroid Build Coastguard Worker.pass2_end: 3925*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m7 3926*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m15 3927*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2048)] 3928*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 0, tmp2q+32*3, 1, 7, 15, strideq*0, r3*4 3929*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 4, tmp2q-32*1, 0, 7, 15, strideq*4, strideq*8 3930*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 8, tmp1q+32*3, 0, 4, 15, strideq*8, strideq*4 3931*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 12, tmp1q-32*1, 0, 4, 15, r3*4, strideq*0 3932*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3933*c0909341SAndroid Build Coastguard Worker sub r2, strideq 3934*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*1] 3935*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 1, tmp2q+32*2, 0, 4, 15, strideq*0, r3*4 3936*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 5, tmp2q-32*2, 0, 4, 15, strideq*4, strideq*8 3937*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 9, tmp1q+32*2, 0, 4, 15, strideq*8, strideq*4 3938*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 13, tmp1q-32*2, 0, 4, 15, r3*4, strideq*0 3939*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3940*c0909341SAndroid Build Coastguard Worker sub r2, strideq 3941*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 2, tmp2q+32*1, 0, 4, 15, strideq*0, r3*4 3942*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 6, tmp2q-32*3, 0, 4, 15, strideq*4, strideq*8 3943*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 10, tmp1q+32*1, 0, 4, 15, strideq*8, strideq*4 3944*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 14, tmp1q-32*3, 0, 4, 15, r3*4, strideq*0 3945*c0909341SAndroid Build Coastguard Worker add dstq, strideq 3946*c0909341SAndroid Build Coastguard Worker sub r2, strideq 3947*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+gprsize+32*0] 3948*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*2] 3949*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 3, tmp2q+32*0, 0, 4, 15, strideq*0, r3*4 3950*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 7, tmp2q-32*4, 0, 4, 15, strideq*4, strideq*8 3951*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 11, tmp1q+32*0, 0, 4, 15, strideq*8, strideq*4 3952*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 1, tmp1q-32*4, 0, 4, 15, r3*4, strideq*0 3953*c0909341SAndroid Build Coastguard Worker ret 3954*c0909341SAndroid Build Coastguard Worker 3955*c0909341SAndroid Build Coastguard Worker; Perform the final sumsub step and YMM lane shuffling 3956*c0909341SAndroid Build Coastguard Worker%macro IDCT32_PASS1_END 4 ; row[1-2], tmp[1-2] 3957*c0909341SAndroid Build Coastguard Worker mova m%3, [tmp2q+32*( 3-%1)] 3958*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%1, m%3 3959*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%3 3960*c0909341SAndroid Build Coastguard Worker mova m%3, [tmp1q+32*(11-%2)] 3961*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*(11-%2)+16], xm%4 3962*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp2q+32*( 3-%1)+16], m%4, 1 3963*c0909341SAndroid Build Coastguard Worker paddsw m%4, m%2, m%3 3964*c0909341SAndroid Build Coastguard Worker psubsw m%2, m%3 3965*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*(11-%2)], xm%2 3966*c0909341SAndroid Build Coastguard Worker vextracti128 [tmp2q+32*( 3-%1)], m%2, 1 3967*c0909341SAndroid Build Coastguard Worker vperm2i128 m%2, m%1, m%4, 0x31 3968*c0909341SAndroid Build Coastguard Worker vinserti128 m%1, xm%4, 1 3969*c0909341SAndroid Build Coastguard Worker%endmacro 3970*c0909341SAndroid Build Coastguard Worker 3971*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 4, 0, dst, stride, c, eob 3972*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 3973*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3974*c0909341SAndroid Build Coastguard Worker jnz .normal 3975*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 3976*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 3977*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 3978*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3979*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 3980*c0909341SAndroid Build Coastguard Worker or r3d, 16 3981*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).dconly 3982*c0909341SAndroid Build Coastguard Worker.normal: 3983*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 6, 16, 32*19, dst, stride, c, eob, tmp1, tmp2 3984*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2896x8)] 3985*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m15, [cq+32* 1] 3986*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, [cq+32* 3] 3987*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m15, [cq+32* 5] 3988*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m15, [cq+32* 7] 3989*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m15, [cq+32* 9] 3990*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m15, [cq+32*11] 3991*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m15, [cq+32*13] 3992*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m15, [cq+32*15] 3993*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m15, [cq+32*17] 3994*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m15, [cq+32*19] 3995*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m15, [cq+32*21] 3996*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m15, [cq+32*23] 3997*c0909341SAndroid Build Coastguard Worker pmulhrsw m12, m15, [cq+32*25] 3998*c0909341SAndroid Build Coastguard Worker pmulhrsw m13, m15, [cq+32*27] 3999*c0909341SAndroid Build Coastguard Worker pmulhrsw m14, m15, [cq+32*29] 4000*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, [cq+32*31] 4001*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 4002*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 4003*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 4004*c0909341SAndroid Build Coastguard Worker LOAD_16ROWS cq+32*0, 32*2, 1, 0 4005*c0909341SAndroid Build Coastguard Worker pxor m15, m15 4006*c0909341SAndroid Build Coastguard Worker mov r3d, 8 4007*c0909341SAndroid Build Coastguard Worker.zero_loop: 4008*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m15 4009*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m15 4010*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m15 4011*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m15 4012*c0909341SAndroid Build Coastguard Worker add cq, 32*4 4013*c0909341SAndroid Build Coastguard Worker dec r3d 4014*c0909341SAndroid Build Coastguard Worker jg .zero_loop 4015*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4016*c0909341SAndroid Build Coastguard Worker call .pass1_end 4017*c0909341SAndroid Build Coastguard Worker lea r2, [strideq*3] 4018*c0909341SAndroid Build Coastguard Worker mov r3, dstq 4019*c0909341SAndroid Build Coastguard Worker.pass2: 4020*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_16384)] 4021*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).transpose_2x8x8_round 4022*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4023*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m15 4024*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2048)] 4025*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m2, m3, m0 4026*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 1, 2, strideq*2, r2 4027*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, [rsp+32*1] 4028*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 2, 3, strideq*0, strideq*1 4029*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4030*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m4, m5, m6, m7 4031*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 2, 3, strideq*0, strideq*1 4032*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 2, 3, strideq*2, r2 4033*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4034*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m8, m9, m10, m11 4035*c0909341SAndroid Build Coastguard Worker WRITE_16X2 8, 9, 2, 3, strideq*0, strideq*1 4036*c0909341SAndroid Build Coastguard Worker WRITE_16X2 10, 11, 2, 3, strideq*2, r2 4037*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4038*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m11, m12, m13, m14 4039*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, [rsp+32*2] 4040*c0909341SAndroid Build Coastguard Worker WRITE_16X2 12, 13, 2, 3, strideq*0, strideq*1 4041*c0909341SAndroid Build Coastguard Worker WRITE_16X2 14, 15, 2, 3, strideq*2, r2 4042*c0909341SAndroid Build Coastguard Worker test r3, r3 4043*c0909341SAndroid Build Coastguard Worker jnz .right_half 4044*c0909341SAndroid Build Coastguard Worker RET 4045*c0909341SAndroid Build Coastguard Worker.right_half: 4046*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp1q-32*4, 32 4047*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H tmp2q-32*4, 32 4048*c0909341SAndroid Build Coastguard Worker lea dstq, [r3+16] 4049*c0909341SAndroid Build Coastguard Worker xor r3d, r3d 4050*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 4051*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 4052*c0909341SAndroid Build Coastguard Worker jmp .pass2 4053*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4054*c0909341SAndroid Build Coastguard Worker.pass1_end: 4055*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m9 4056*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 0, 8, 1, 9 4057*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 2, 10, 1, 9 4058*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 3, 11, 1, 9 4059*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 4, 12, 1, 9 4060*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 5, 13, 1, 9 4061*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 6, 14, 1, 9 4062*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 7, 15, 1, 9 4063*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*1] 4064*c0909341SAndroid Build Coastguard Worker mova m9, [rsp+gprsize+32*0] 4065*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m6 4066*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m7 4067*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 1, 9, 6, 7 4068*c0909341SAndroid Build Coastguard Worker ret 4069*c0909341SAndroid Build Coastguard Worker 4070*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_8bpc, 4, 5, 13, dst, stride, c, eob 4071*c0909341SAndroid Build Coastguard Worker%undef cmp 4072*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4073*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2896x8)] 4074*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_1697x16)] 4075*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_8192)] 4076*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 ; if (eob > 43) 4077*c0909341SAndroid Build Coastguard Worker setg r4b ; iteration_count++ 4078*c0909341SAndroid Build Coastguard Worker cmp eobd, 150 ; if (eob > 150) 4079*c0909341SAndroid Build Coastguard Worker setg al ; iteration_count++ 4080*c0909341SAndroid Build Coastguard Worker add eobd, -279 ; if (eob > 278) 4081*c0909341SAndroid Build Coastguard Worker adc r4b, al ; iteration_count++ 4082*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 4083*c0909341SAndroid Build Coastguard Worker mov r6, cq 4084*c0909341SAndroid Build Coastguard Worker paddw m11, m12, m12 ; pw_16384 4085*c0909341SAndroid Build Coastguard Worker.loop: 4086*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+64* 0] 4087*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+64* 1] 4088*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+64* 8], 1 4089*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+64* 9], 1 4090*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+64* 2] 4091*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+64* 3] 4092*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+64*10], 1 4093*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+64*11], 1 4094*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+64* 4] 4095*c0909341SAndroid Build Coastguard Worker mova xm5, [cq+64* 5] 4096*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+64*12], 1 4097*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+64*13], 1 4098*c0909341SAndroid Build Coastguard Worker mova xm6, [cq+64* 6] 4099*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+64* 7] 4100*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+64*14], 1 4101*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+64*15], 1 4102*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9 }, m0, m1, m2, m3, m4, m5, m6, m7 4103*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 8, 10, 11}, 0, 1, 2, 3, 4, 5, 6, 7 4104*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 4105*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 4106*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 4107*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r3 4108*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4109*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 4110*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r3 4111*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4112*c0909341SAndroid Build Coastguard Worker add cq, 16 4113*c0909341SAndroid Build Coastguard Worker dec r4b 4114*c0909341SAndroid Build Coastguard Worker jge .loop 4115*c0909341SAndroid Build Coastguard Worker sub cq, 32 4116*c0909341SAndroid Build Coastguard Worker pxor m0, m0 4117*c0909341SAndroid Build Coastguard Worker mov r0d, 8 4118*c0909341SAndroid Build Coastguard Worker cmp cq, r6 4119*c0909341SAndroid Build Coastguard Worker ja .zero_loop 4120*c0909341SAndroid Build Coastguard Worker.zero_loop_half: 4121*c0909341SAndroid Build Coastguard Worker mova [r6+64*0], m0 4122*c0909341SAndroid Build Coastguard Worker mova [r6+64*1], m0 4123*c0909341SAndroid Build Coastguard Worker add r6, 64*4 4124*c0909341SAndroid Build Coastguard Worker mova [r6-64*2], m0 4125*c0909341SAndroid Build Coastguard Worker mova [r6-64*1], m0 4126*c0909341SAndroid Build Coastguard Worker sub r0d, 2 4127*c0909341SAndroid Build Coastguard Worker jg .zero_loop_half 4128*c0909341SAndroid Build Coastguard Worker RET 4129*c0909341SAndroid Build Coastguard Worker.zero_loop: 4130*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 4131*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m0 4132*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m0 4133*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m0 4134*c0909341SAndroid Build Coastguard Worker add r6, 32*4 4135*c0909341SAndroid Build Coastguard Worker dec r0d 4136*c0909341SAndroid Build Coastguard Worker jg .zero_loop 4137*c0909341SAndroid Build Coastguard Worker RET 4138*c0909341SAndroid Build Coastguard Worker 4139*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_8bpc, 4, 6, 12, dst, stride, c, eob 4140*c0909341SAndroid Build Coastguard Worker%undef cmp 4141*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4142*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_2896x8)] 4143*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_1697x16)] 4144*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_2048)] 4145*c0909341SAndroid Build Coastguard Worker cmp eobd, 35 ; if (eob > 35) 4146*c0909341SAndroid Build Coastguard Worker setg r4b ; iteration_count++ 4147*c0909341SAndroid Build Coastguard Worker cmp eobd, 150 ; if (eob > 150) 4148*c0909341SAndroid Build Coastguard Worker setg r3b ; iteration_count += 2 4149*c0909341SAndroid Build Coastguard Worker lea r4d, [r4+r3*2] 4150*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 4151*c0909341SAndroid Build Coastguard Worker mov r5, dstq 4152*c0909341SAndroid Build Coastguard Worker mov r6, cq 4153*c0909341SAndroid Build Coastguard Worker.loop: 4154*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+32* 0] 4155*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+32* 1] 4156*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+32* 8], 1 4157*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+32* 9], 1 4158*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+32* 2] 4159*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+32* 3] 4160*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+32*10], 1 4161*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+32*11], 1 4162*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+32* 4] 4163*c0909341SAndroid Build Coastguard Worker mova xm5, [cq+32* 5] 4164*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+32*12], 1 4165*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+32*13], 1 4166*c0909341SAndroid Build Coastguard Worker mova xm6, [cq+32* 6] 4167*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+32* 7] 4168*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+32*14], 1 4169*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+32*15], 1 4170*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9 }, m0, m1, m2, m3, m4, m5, m6, m7 4171*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x }, m0, m1, m2, m3, m4, m5, m6, m7 4172*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 4173*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 8, 10}, 0, 1, 2, 3, 4, 5, 6, 7 4174*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 4175*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 4176*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r3 4177*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4178*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 4179*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r3 4180*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4181*c0909341SAndroid Build Coastguard Worker add cq, 16 4182*c0909341SAndroid Build Coastguard Worker dec r4b 4183*c0909341SAndroid Build Coastguard Worker jl .ret 4184*c0909341SAndroid Build Coastguard Worker test r4b, 1 4185*c0909341SAndroid Build Coastguard Worker jz .loop 4186*c0909341SAndroid Build Coastguard Worker add cq, 32*15 4187*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 4188*c0909341SAndroid Build Coastguard Worker jmp .loop 4189*c0909341SAndroid Build Coastguard Worker.ret: 4190*c0909341SAndroid Build Coastguard Worker sub cd, eax 4191*c0909341SAndroid Build Coastguard Worker pxor m0, m0 4192*c0909341SAndroid Build Coastguard Worker add cd, 384 4193*c0909341SAndroid Build Coastguard Worker.zero_loop: 4194*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 4195*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m0 4196*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m0 4197*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m0 4198*c0909341SAndroid Build Coastguard Worker add r6, 32*4 4199*c0909341SAndroid Build Coastguard Worker sub cd, 128 4200*c0909341SAndroid Build Coastguard Worker jge .zero_loop 4201*c0909341SAndroid Build Coastguard Worker RET 4202*c0909341SAndroid Build Coastguard Worker 4203*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_8bpc, 4, 4, 0, dst, stride, c, eob 4204*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4205*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4206*c0909341SAndroid Build Coastguard Worker jnz .normal 4207*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 4208*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 4209*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 4210*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4211*c0909341SAndroid Build Coastguard Worker or r3d, 32 4212*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).dconly 4213*c0909341SAndroid Build Coastguard Worker.normal: 4214*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 9, 16, 32*67, dst, stride, c, eob, tmp1, tmp2, \ 4215*c0909341SAndroid Build Coastguard Worker base, tmp3, tmp4 4216*c0909341SAndroid Build Coastguard Worker %undef cmp 4217*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 4218*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 4219*c0909341SAndroid Build Coastguard Worker sub eobd, 136 4220*c0909341SAndroid Build Coastguard Worker mov tmp4d, eobd 4221*c0909341SAndroid Build Coastguard Worker.pass1_loop: 4222*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*1, 64*2 4223*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4224*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 1, 3, 5, 7, 9, 11, 13, 15 4225*c0909341SAndroid Build Coastguard Worker test tmp4d, tmp4d 4226*c0909341SAndroid Build Coastguard Worker jl .fast 4227*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H cq+64*17, 64*2 4228*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 4229*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H cq+64*16, 64*2 4230*c0909341SAndroid Build Coastguard Worker pxor m0, m0 4231*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m0}, 16, 17, 18, 19, 20, 21, 22, 23, \ 4232*c0909341SAndroid Build Coastguard Worker 24, 25, 26, 27, 28, 29, 30, 31 4233*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 4234*c0909341SAndroid Build Coastguard Worker jmp .idct16 4235*c0909341SAndroid Build Coastguard Worker.fast: 4236*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 4237*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4238*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 4239*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 4240*c0909341SAndroid Build Coastguard Worker.idct16: 4241*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*0, 64*2 4242*c0909341SAndroid Build Coastguard Worker pxor m15, m15 4243*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m15}, 0, 2, 4, 6, 8, 10, 12, 14 4244*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4245*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).pass1_end 4246*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_8192)] 4247*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).transpose_2x8x8_round 4248*c0909341SAndroid Build Coastguard Worker lea tmp3q, [tmp1q+32*32] 4249*c0909341SAndroid Build Coastguard Worker mova m15, [rsp] 4250*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*4], m0 4251*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*3], m2 4252*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*2], m4 4253*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*1], m6 4254*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*0], m8 4255*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*1], m10 4256*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*2], m12 4257*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*3], m14 4258*c0909341SAndroid Build Coastguard Worker add tmp3q, 32*8 4259*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*4], m1 4260*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*3], m3 4261*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*2], m5 4262*c0909341SAndroid Build Coastguard Worker mova [tmp3q-32*1], m7 4263*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*0], m9 4264*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*1], m11 4265*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*2], m13 4266*c0909341SAndroid Build Coastguard Worker mova [tmp3q+32*3], m15 4267*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_8192)] 4268*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, [tmp1q-32*4] 4269*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, [tmp1q-32*3] 4270*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m9, [tmp1q-32*2] 4271*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m9, [tmp1q-32*1] 4272*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m9, [tmp1q+32*0] 4273*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m9, [tmp1q+32*1] 4274*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9, [tmp1q+32*2] 4275*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, [tmp1q+32*3] 4276*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 4277*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 4278*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, [tmp2q-32*4] 4279*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m1 4280*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, [tmp2q-32*3] 4281*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m2 4282*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m9, [tmp2q-32*2] 4283*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m3 4284*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m9, [tmp2q-32*1] 4285*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m4 4286*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m9, [tmp2q+32*0] 4287*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m5 4288*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m9, [tmp2q+32*1] 4289*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m6 4290*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9, [tmp2q+32*2] 4291*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m7 4292*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, [tmp2q+32*3] 4293*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 4294*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m0 4295*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m1 4296*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m2 4297*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m3 4298*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m4 4299*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m5 4300*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m6 4301*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m7 4302*c0909341SAndroid Build Coastguard Worker add cq, 32 4303*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 4304*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*16 4305*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 4306*c0909341SAndroid Build Coastguard Worker jnc .pass1_loop 4307*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*24 4308*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 4309*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 4310*c0909341SAndroid Build Coastguard Worker add r2, dstq 4311*c0909341SAndroid Build Coastguard Worker test tmp4d, tmp4d 4312*c0909341SAndroid Build Coastguard Worker jge .pass2_loop 4313*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 4314*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*16 4315*c0909341SAndroid Build Coastguard Worker add tmp3q, 32*16 4316*c0909341SAndroid Build Coastguard Worker.pass2_loop: 4317*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp2q-32*4, 32 4318*c0909341SAndroid Build Coastguard Worker test tmp4d, tmp4d 4319*c0909341SAndroid Build Coastguard Worker jl .fast2 4320*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H tmp3q-32*4, 32 4321*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 4322*c0909341SAndroid Build Coastguard Worker sub tmp3q, 32*8 4323*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H tmp3q-32*4, 32 4324*c0909341SAndroid Build Coastguard Worker sub tmp3q, 32*16 4325*c0909341SAndroid Build Coastguard Worker jmp .pass2_loop_end 4326*c0909341SAndroid Build Coastguard Worker.fast2: 4327*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 4328*c0909341SAndroid Build Coastguard Worker sub tmp3q, 32*24 4329*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4330*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 4331*c0909341SAndroid Build Coastguard Worker.pass2_loop_end: 4332*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp3q-32*4, 32 4333*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 4334*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4335*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).pass2_end 4336*c0909341SAndroid Build Coastguard Worker lea tmp3q, [tmp1q-32*32] 4337*c0909341SAndroid Build Coastguard Worker cmp tmp2q, tmp3q 4338*c0909341SAndroid Build Coastguard Worker jb .ret 4339*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*32 4340*c0909341SAndroid Build Coastguard Worker sub dstq, r3 4341*c0909341SAndroid Build Coastguard Worker lea r2, [r2+r3+16] 4342*c0909341SAndroid Build Coastguard Worker add dstq, 16 4343*c0909341SAndroid Build Coastguard Worker jmp .pass2_loop 4344*c0909341SAndroid Build Coastguard Worker.ret: 4345*c0909341SAndroid Build Coastguard Worker RET 4346*c0909341SAndroid Build Coastguard Worker 4347*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_8bpc, 4, 6, 10, dst, stride, c, eob 4348*c0909341SAndroid Build Coastguard Worker %undef cmp 4349*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_8192] 4350*c0909341SAndroid Build Coastguard Worker sub eobd, 136 ; if (eob < 136) 4351*c0909341SAndroid Build Coastguard Worker shr eobd, 30 ; topleft 16x16 only 4352*c0909341SAndroid Build Coastguard Worker lea eobd, [eobq*2-8] 4353*c0909341SAndroid Build Coastguard Worker lea r4, [strideq*3] 4354*c0909341SAndroid Build Coastguard Worker mov r5, dstq 4355*c0909341SAndroid Build Coastguard Worker lea r6, [cq+32] 4356*c0909341SAndroid Build Coastguard Worker.loop: 4357*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+64* 0] 4358*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+64* 1] 4359*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+64* 8], 1 4360*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+64* 9], 1 4361*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+64* 2] 4362*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+64* 3] 4363*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [cq+64*10], 1 4364*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [cq+64*11], 1 4365*c0909341SAndroid Build Coastguard Worker mova xm4, [cq+64* 4] 4366*c0909341SAndroid Build Coastguard Worker mova xm5, [cq+64* 5] 4367*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [cq+64*12], 1 4368*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [cq+64*13], 1 4369*c0909341SAndroid Build Coastguard Worker mova xm6, [cq+64* 6] 4370*c0909341SAndroid Build Coastguard Worker mova xm7, [cq+64* 7] 4371*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [cq+64*14], 1 4372*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [cq+64*15], 1 4373*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 4374*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 4375*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 8, 0, strideq*0, strideq*1 4376*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 0, 1, strideq*2, r4 4377*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4378*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 0, 1, strideq*0, strideq*1 4379*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 0, 1, strideq*2, r4 4380*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4381*c0909341SAndroid Build Coastguard Worker add cq, 16 4382*c0909341SAndroid Build Coastguard Worker inc eobd 4383*c0909341SAndroid Build Coastguard Worker jz .ret 4384*c0909341SAndroid Build Coastguard Worker test eobd, 3 4385*c0909341SAndroid Build Coastguard Worker jnz .loop 4386*c0909341SAndroid Build Coastguard Worker add cq, 64*15 4387*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 4388*c0909341SAndroid Build Coastguard Worker jmp .loop 4389*c0909341SAndroid Build Coastguard Worker.ret: 4390*c0909341SAndroid Build Coastguard Worker pxor m0, m0 4391*c0909341SAndroid Build Coastguard Worker mov r0d, 16 4392*c0909341SAndroid Build Coastguard Worker cmp cq, r6 4393*c0909341SAndroid Build Coastguard Worker jne .zero_loop 4394*c0909341SAndroid Build Coastguard Worker.zero_loop_topleft: 4395*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m0 4396*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m0 4397*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m0 4398*c0909341SAndroid Build Coastguard Worker mova [r6+32*5], m0 4399*c0909341SAndroid Build Coastguard Worker add r6, 64*4 4400*c0909341SAndroid Build Coastguard Worker sub r0d, 4 4401*c0909341SAndroid Build Coastguard Worker jg .zero_loop_topleft 4402*c0909341SAndroid Build Coastguard Worker RET 4403*c0909341SAndroid Build Coastguard Worker.zero_loop: 4404*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m0 4405*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 4406*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m0 4407*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m0 4408*c0909341SAndroid Build Coastguard Worker add r6, 32*4 4409*c0909341SAndroid Build Coastguard Worker dec r0d 4410*c0909341SAndroid Build Coastguard Worker jg .zero_loop 4411*c0909341SAndroid Build Coastguard Worker RET 4412*c0909341SAndroid Build Coastguard Worker 4413*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PART2_END 6-10 ; out, src[1-2], tmp[1-3], (offset[1-4]) 4414*c0909341SAndroid Build Coastguard Worker%if %1 & 1 4415*c0909341SAndroid Build Coastguard Worker mova m%5, [tmp2q-32*(51-%1)] ; idct16 out 0+n 4416*c0909341SAndroid Build Coastguard Worker mova m%4, [tmp1q-32*(14+%1)] ; idct32 out31-n 4417*c0909341SAndroid Build Coastguard Worker%else 4418*c0909341SAndroid Build Coastguard Worker mova m%5, [tmp1q-32*(45-%1)] 4419*c0909341SAndroid Build Coastguard Worker mova m%4, [tmp2q-32*(20+%1)] 4420*c0909341SAndroid Build Coastguard Worker%endif 4421*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%5, m%4 ; idct32 out31-n 4422*c0909341SAndroid Build Coastguard Worker paddsw m%5, m%4 ; idct32 out 0+n 4423*c0909341SAndroid Build Coastguard Worker psubsw m%4, m%6, m%3 ; out32+n 4424*c0909341SAndroid Build Coastguard Worker paddsw m%6, m%3 ; out31-n 4425*c0909341SAndroid Build Coastguard Worker psubsw m%3, m%5, m%2 ; out63-n 4426*c0909341SAndroid Build Coastguard Worker paddsw m%5, m%2 ; out 0+n 4427*c0909341SAndroid Build Coastguard Worker%if %0 == 6 ; pass 1 4428*c0909341SAndroid Build Coastguard Worker%if %1 & 1 4429*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*(19-%1)], m%4 4430*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*(14+%1)], m%6 4431*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*(18-%1)], m%3 4432*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*(51-%1)], m%5 4433*c0909341SAndroid Build Coastguard Worker%else 4434*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*(13-%1)], m%4 4435*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*(20+%1)], m%6 4436*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*(12-%1)], m%3 4437*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*(45-%1)], m%5 4438*c0909341SAndroid Build Coastguard Worker%endif 4439*c0909341SAndroid Build Coastguard Worker%else ; pass 2 4440*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m14}, m%4, m%6, m%3, m%5 4441*c0909341SAndroid Build Coastguard Worker%if %1 & 1 4442*c0909341SAndroid Build Coastguard Worker %define %%d0 r2 4443*c0909341SAndroid Build Coastguard Worker %define %%d1 dstq 4444*c0909341SAndroid Build Coastguard Worker%else 4445*c0909341SAndroid Build Coastguard Worker %define %%d0 dstq 4446*c0909341SAndroid Build Coastguard Worker %define %%d1 r2 4447*c0909341SAndroid Build Coastguard Worker%endif 4448*c0909341SAndroid Build Coastguard Worker pmovzxbw m%2, [%%d0+%9 ] 4449*c0909341SAndroid Build Coastguard Worker paddw m%2, m%4 4450*c0909341SAndroid Build Coastguard Worker pmovzxbw m%4, [%%d1+%8 ] 4451*c0909341SAndroid Build Coastguard Worker paddw m%4, m%6 4452*c0909341SAndroid Build Coastguard Worker pmovzxbw m%6, [%%d1+%10] 4453*c0909341SAndroid Build Coastguard Worker paddw m%3, m%6 4454*c0909341SAndroid Build Coastguard Worker pmovzxbw m%6, [%%d0+%7 ] 4455*c0909341SAndroid Build Coastguard Worker paddw m%5, m%6 4456*c0909341SAndroid Build Coastguard Worker packuswb m%2, m%4 4457*c0909341SAndroid Build Coastguard Worker packuswb m%3, m%5 4458*c0909341SAndroid Build Coastguard Worker vpermq m%2, m%2, q3120 4459*c0909341SAndroid Build Coastguard Worker vpermq m%3, m%3, q3120 4460*c0909341SAndroid Build Coastguard Worker mova [%%d0+%9 ], xm%2 4461*c0909341SAndroid Build Coastguard Worker vextracti128 [%%d1+%8 ], m%2, 1 4462*c0909341SAndroid Build Coastguard Worker mova [%%d1+%10], xm%3 4463*c0909341SAndroid Build Coastguard Worker vextracti128 [%%d0+%7 ], m%3, 1 4464*c0909341SAndroid Build Coastguard Worker%endif 4465*c0909341SAndroid Build Coastguard Worker%endmacro 4466*c0909341SAndroid Build Coastguard Worker 4467*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_8bpc, 4, 4, 0, dst, stride, c, eob 4468*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4469*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4470*c0909341SAndroid Build Coastguard Worker jnz .normal 4471*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 4472*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 4473*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 4474*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4475*c0909341SAndroid Build Coastguard Worker or r3d, 64 4476*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_8bpc).dconly 4477*c0909341SAndroid Build Coastguard Worker.normal: 4478*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 10, 16, 32*67, dst, stride, c, eob, tmp1, tmp2 4479*c0909341SAndroid Build Coastguard Worker %undef cmp 4480*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*23] 4481*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*24] 4482*c0909341SAndroid Build Coastguard Worker sub eobd, 151 4483*c0909341SAndroid Build Coastguard Worker mov r7d, eobd 4484*c0909341SAndroid Build Coastguard Worker.pass1_loop: 4485*c0909341SAndroid Build Coastguard Worker LOAD_16ROWS cq, 64 4486*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4487*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 4488*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 4489*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 4490*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_8192)] 4491*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).transpose_2x8x8_round 4492*c0909341SAndroid Build Coastguard Worker mova m15, [rsp+32*0] 4493*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 4494*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m2 4495*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m4 4496*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m6 4497*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m8 4498*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m10 4499*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m12 4500*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m14 4501*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m1 4502*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m3 4503*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m5 4504*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m7 4505*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m9 4506*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m11 4507*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m13 4508*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m15 4509*c0909341SAndroid Build Coastguard Worker add cq, 32 4510*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4511*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*8 4512*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 4513*c0909341SAndroid Build Coastguard Worker jnc .pass1_loop 4514*c0909341SAndroid Build Coastguard Worker lea r2, [rsp+32*23] 4515*c0909341SAndroid Build Coastguard Worker mova xm0, [r2-32*4+ 0] 4516*c0909341SAndroid Build Coastguard Worker mova xm1, [r2-32*2+ 0] 4517*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [r2+32*0+ 0], 1 4518*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [r2+32*2+ 0], 1 4519*c0909341SAndroid Build Coastguard Worker mova xm2, [r2-32*4+16] 4520*c0909341SAndroid Build Coastguard Worker mova xm3, [r2-32*2+16] 4521*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [r2+32*0+16], 1 4522*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r2+32*2+16], 1 4523*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4524*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14 4525*c0909341SAndroid Build Coastguard Worker test r7d, r7d 4526*c0909341SAndroid Build Coastguard Worker jl .fast 4527*c0909341SAndroid Build Coastguard Worker lea r3, [r2+32*8] 4528*c0909341SAndroid Build Coastguard Worker mova xm4, [r3-32*4+ 0] 4529*c0909341SAndroid Build Coastguard Worker mova xm5, [r3-32*2+ 0] 4530*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [r3+32*0+ 0], 1 4531*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [r3+32*2+ 0], 1 4532*c0909341SAndroid Build Coastguard Worker mova xm6, [r3-32*4+16] 4533*c0909341SAndroid Build Coastguard Worker mova xm7, [r3-32*2+16] 4534*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [r3+32*0+16], 1 4535*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [r3+32*2+16], 1 4536*c0909341SAndroid Build Coastguard Worker.fast: 4537*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 4538*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 4539*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4540*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 4541*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 4542*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 4543*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 4544*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 4545*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 4546*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 4547*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 4548*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 4549*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4550*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 4551*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 4552*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 4553*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 4554*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 4555*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 4556*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 4557*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 4558*c0909341SAndroid Build Coastguard Worker mova xm0, [r2-32*3+ 0] 4559*c0909341SAndroid Build Coastguard Worker mova xm1, [r2-32*1+ 0] 4560*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [r2+32*1+ 0], 1 4561*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [r2+32*3+ 0], 1 4562*c0909341SAndroid Build Coastguard Worker mova xm2, [r2-32*3+16] 4563*c0909341SAndroid Build Coastguard Worker mova xm3, [r2-32*1+16] 4564*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [r2+32*1+16], 1 4565*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r2+32*3+16], 1 4566*c0909341SAndroid Build Coastguard Worker pxor m4, m4 4567*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 4568*c0909341SAndroid Build Coastguard Worker test r7d, r7d 4569*c0909341SAndroid Build Coastguard Worker jl .fast2 4570*c0909341SAndroid Build Coastguard Worker mova xm4, [r3-32*3+ 0] 4571*c0909341SAndroid Build Coastguard Worker mova xm5, [r3-32*1+ 0] 4572*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [r3+32*1+ 0], 1 4573*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [r3+32*3+ 0], 1 4574*c0909341SAndroid Build Coastguard Worker mova xm6, [r3-32*3+16] 4575*c0909341SAndroid Build Coastguard Worker mova xm7, [r3-32*1+16] 4576*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [r3+32*1+16], 1 4577*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [r3+32*3+16], 1 4578*c0909341SAndroid Build Coastguard Worker.fast2: 4579*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4580*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 4581*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 4582*c0909341SAndroid Build Coastguard Worker add r2, 32*24 4583*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 4584*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 4585*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 4586*c0909341SAndroid Build Coastguard Worker mova xm0, [r2-32*4+ 0] 4587*c0909341SAndroid Build Coastguard Worker mova xm3, [r2-32*1+16] 4588*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [r2+32*0+ 0], 1 4589*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r2+32*3+16], 1 4590*c0909341SAndroid Build Coastguard Worker mova xm4, [r2-32*4+16] 4591*c0909341SAndroid Build Coastguard Worker mova xm7, [r2-32*1+ 0] 4592*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [r2+32*0+16], 1 4593*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [r2+32*3+ 0], 1 4594*c0909341SAndroid Build Coastguard Worker pxor m1, m1 4595*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 4596*c0909341SAndroid Build Coastguard Worker test r7d, r7d 4597*c0909341SAndroid Build Coastguard Worker jl .fast3 4598*c0909341SAndroid Build Coastguard Worker add r3, 32*24 4599*c0909341SAndroid Build Coastguard Worker mova xm1, [r3-32*1+16] 4600*c0909341SAndroid Build Coastguard Worker mova xm2, [r3-32*4+ 0] 4601*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [r3+32*3+16], 1 4602*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [r3+32*0+ 0], 1 4603*c0909341SAndroid Build Coastguard Worker mova xm5, [r3-32*1+ 0] 4604*c0909341SAndroid Build Coastguard Worker mova xm6, [r3-32*4+16] 4605*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [r3+32*3+ 0], 1 4606*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [r3+32*0+16], 1 4607*c0909341SAndroid Build Coastguard Worker.fast3: 4608*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 4609*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 4610*c0909341SAndroid Build Coastguard Worker add r6, 8 4611*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4612*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 4613*c0909341SAndroid Build Coastguard Worker mova xm0, [r2-32*2+ 0] 4614*c0909341SAndroid Build Coastguard Worker mova xm3, [r2-32*3+16] 4615*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [r2+32*2+ 0], 1 4616*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r2+32*1+16], 1 4617*c0909341SAndroid Build Coastguard Worker mova xm4, [r2-32*2+16] 4618*c0909341SAndroid Build Coastguard Worker mova xm7, [r2-32*3+ 0] 4619*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [r2+32*2+16], 1 4620*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [r2+32*1+ 0], 1 4621*c0909341SAndroid Build Coastguard Worker pxor m1, m1 4622*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 4623*c0909341SAndroid Build Coastguard Worker test r7d, r7d 4624*c0909341SAndroid Build Coastguard Worker jl .fast4 4625*c0909341SAndroid Build Coastguard Worker mova xm1, [r3-32*3+16] 4626*c0909341SAndroid Build Coastguard Worker mova xm2, [r3-32*2+ 0] 4627*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [r3+32*1+16], 1 4628*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [r3+32*2+ 0], 1 4629*c0909341SAndroid Build Coastguard Worker mova xm5, [r3-32*3+ 0] 4630*c0909341SAndroid Build Coastguard Worker mova xm6, [r3-32*2+16] 4631*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [r3+32*1+ 0], 1 4632*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [r3+32*2+16], 1 4633*c0909341SAndroid Build Coastguard Worker.fast4: 4634*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 4635*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass2 4636*c0909341SAndroid Build Coastguard Worker RET 4637*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4638*c0909341SAndroid Build Coastguard Worker%define o_base idct64_mul - 8 4639*c0909341SAndroid Build Coastguard Workercglobal_label .main_part1 4640*c0909341SAndroid Build Coastguard Worker ; idct64 steps 1-5: 4641*c0909341SAndroid Build Coastguard Worker ; in1/31/17/15/ 9/23/25/ 7 -> 4642*c0909341SAndroid Build Coastguard Worker ; t32a/33/34a/35/36/37a/38/39a/56a/57/58a/59/60/61a/62/63a 4643*c0909341SAndroid Build Coastguard Worker ; in5/27/21/11/13/19/29/ 3 -> 4644*c0909341SAndroid Build Coastguard Worker ; t40a/41/42a/43/44/45a/46/47a/48a/49/50a/51/52/53a/54/55a 4645*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(idct64_mul+4* 0)] 4646*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(idct64_mul+4* 1)] 4647*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(idct64_mul+4* 4)] 4648*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(idct64_mul+4* 5)] 4649*c0909341SAndroid Build Coastguard Worker pmulhrsw m11, m0 ; t63a 4650*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m13 ; t32a 4651*c0909341SAndroid Build Coastguard Worker pmulhrsw m10, m1 ; t62a 4652*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 ; t33a 4653*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(idct64_mul+4* 8)] 4654*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(idct64_mul+4* 9)] 4655*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(idct64_mul+4*12)] 4656*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(idct64_mul+4*13)] 4657*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m2 ; t61a 4658*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13 ; t34a 4659*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m3 ; t60a 4660*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12 ; t35a 4661*c0909341SAndroid Build Coastguard Worker psubsw m12, m0, m1 ; t33 4662*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; t32 4663*c0909341SAndroid Build Coastguard Worker psubsw m1, m3, m2 ; t34 4664*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ; t35 4665*c0909341SAndroid Build Coastguard Worker psubsw m2, m8, m9 ; t61 4666*c0909341SAndroid Build Coastguard Worker paddsw m8, m9 ; t60 4667*c0909341SAndroid Build Coastguard Worker psubsw m9, m11, m10 ; t62 4668*c0909341SAndroid Build Coastguard Worker paddsw m11, m10 ; t63 4669*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 1, 10, 13, 15, m4076, 401 ; t34a, t61a 4670*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_401_4076)] 4671*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 9, 12, 10, 13, 15, 14, 13 ; t33a, t62a 4672*c0909341SAndroid Build Coastguard Worker psubsw m10, m0, m3 ; t35a 4673*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ; t32a 4674*c0909341SAndroid Build Coastguard Worker psubsw m3, m11, m8 ; t60a 4675*c0909341SAndroid Build Coastguard Worker paddsw m11, m8 ; t63a 4676*c0909341SAndroid Build Coastguard Worker psubsw m8, m9, m2 ; t34 4677*c0909341SAndroid Build Coastguard Worker paddsw m9, m2 ; t33 4678*c0909341SAndroid Build Coastguard Worker psubsw m2, m12, m1 ; t61 4679*c0909341SAndroid Build Coastguard Worker paddsw m12, m1 ; t62 4680*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 4681*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 4682*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m12 4683*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m11 4684*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pw_m4017_799)] 4685*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_799_4017)] 4686*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 2, 8, 0, 1, 15, 14, 13 ; t34a, t61a 4687*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 3, 10, 0, 1, 15, 14, 13 ; t35, t60 4688*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 4689*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 4690*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m10 4691*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m8 4692*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(idct64_mul+4*16)] 4693*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(idct64_mul+4*17)] 4694*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(idct64_mul+4*20)] 4695*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(idct64_mul+4*21)] 4696*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [o(idct64_mul+4*24)] 4697*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(idct64_mul+4*25)] 4698*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [o(idct64_mul+4*28)] 4699*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(idct64_mul+4*29)] 4700*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 ; t59a 4701*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m11 ; t36a 4702*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 ; t58a 4703*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m10 ; t37a 4704*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6 ; t57a 4705*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9 ; t38a 4706*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 ; t56a 4707*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m8 ; t39a 4708*c0909341SAndroid Build Coastguard Worker psubsw m8, m4, m5 ; t37 4709*c0909341SAndroid Build Coastguard Worker paddsw m4, m5 ; t36 4710*c0909341SAndroid Build Coastguard Worker psubsw m5, m7, m6 ; t38 4711*c0909341SAndroid Build Coastguard Worker paddsw m7, m6 ; t39 4712*c0909341SAndroid Build Coastguard Worker psubsw m6, m0, m1 ; t57 4713*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 ; t56 4714*c0909341SAndroid Build Coastguard Worker psubsw m1, m3, m2 ; t58 4715*c0909341SAndroid Build Coastguard Worker paddsw m3, m2 ; t59 4716*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 6, 5, 2, 9, 15, m2598, 3166 ; t38a, t57a 4717*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_3166_2598)] 4718*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 8, 2, 9, 15, 10, 9 ; t37a, t58a 4719*c0909341SAndroid Build Coastguard Worker psubsw m2, m7, m4 ; t36a 4720*c0909341SAndroid Build Coastguard Worker paddsw m7, m4 ; t39a 4721*c0909341SAndroid Build Coastguard Worker psubsw m4, m0, m3 ; t59a 4722*c0909341SAndroid Build Coastguard Worker paddsw m0, m3 ; t56a 4723*c0909341SAndroid Build Coastguard Worker psubsw m3, m6, m1 ; t37 4724*c0909341SAndroid Build Coastguard Worker paddsw m6, m1 ; t38 4725*c0909341SAndroid Build Coastguard Worker psubsw m1, m5, m8 ; t58 4726*c0909341SAndroid Build Coastguard Worker paddsw m5, m8 ; t57 4727*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 4728*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 4729*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m0 4730*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m5 4731*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pw_m799_m4017)] 4732*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_m4017_799)] 4733*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 2, 0, 5, 15, 7, 6 ; t36, t59 4734*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 3, 0, 5, 15, 7, 6 ; t37a, t58a 4735*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 4736*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m1 4737*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m3 4738*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m2 4739*c0909341SAndroid Build Coastguard Worker ret 4740*c0909341SAndroid Build Coastguard Worker%define o_base pw_5 + 128 4741*c0909341SAndroid Build Coastguard Worker.main_part2_pass1: ; idct64 steps 6-9 + idct16/32/64 sumsub 4742*c0909341SAndroid Build Coastguard Worker sub r6, o_idct64_offset + 8 4743*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_1567_3784)] 4744*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_m3784_1567)] 4745*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pw_2896_2896)] 4746*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_m2896_2896)] 4747*c0909341SAndroid Build Coastguard Worker.main_part2_pass1_loop: 4748*c0909341SAndroid Build Coastguard Worker call .main_part2_internal 4749*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 0, 7, 0, 6, 9, 10 4750*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 7, 8, 5, 0, 6, 7 4751*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 8, 2, 1, 0, 6, 7 4752*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 15, 3, 4, 0, 6, 7 4753*c0909341SAndroid Build Coastguard Worker cmp tmp1q, tmp2q 4754*c0909341SAndroid Build Coastguard Worker jne .main_part2_pass1_loop 4755*c0909341SAndroid Build Coastguard Worker ret 4756*c0909341SAndroid Build Coastguard Workercglobal_label .main_part2_internal 4757*c0909341SAndroid Build Coastguard Worker mova m0, [tmp1q-32*12] ; t32a 4758*c0909341SAndroid Build Coastguard Worker mova m6, [tmp2q-32*13] ; t39a 4759*c0909341SAndroid Build Coastguard Worker mova m1, [tmp1q-32* 4] ; t40a 4760*c0909341SAndroid Build Coastguard Worker mova m5, [tmp2q+32* 3] ; t55a 4761*c0909341SAndroid Build Coastguard Worker add tmp1q, 32 4762*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32 4763*c0909341SAndroid Build Coastguard Worker mova m2, [tmp1q+32* 3] ; t48a 4764*c0909341SAndroid Build Coastguard Worker mova m4, [tmp2q-32* 4] ; t47a 4765*c0909341SAndroid Build Coastguard Worker mova m3, [tmp1q+32*11] ; t56a 4766*c0909341SAndroid Build Coastguard Worker mova m7, [tmp2q+32*12] ; t63a 4767*c0909341SAndroid Build Coastguard Worker psubsw m8, m0, m6 ; t39 4768*c0909341SAndroid Build Coastguard Worker paddsw m0, m6 ; t32 4769*c0909341SAndroid Build Coastguard Worker psubsw m6, m4, m1 ; t40 4770*c0909341SAndroid Build Coastguard Worker paddsw m4, m1 ; t47 4771*c0909341SAndroid Build Coastguard Worker psubsw m1, m2, m5 ; t55 4772*c0909341SAndroid Build Coastguard Worker paddsw m2, m5 ; t48 4773*c0909341SAndroid Build Coastguard Worker psubsw m5, m7, m3 ; t56 4774*c0909341SAndroid Build Coastguard Worker paddsw m7, m3 ; t63 4775*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 5, 8, 3, 9, 15, 11, 12 ; t39a, t56a 4776*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_m1567_m3784)] 4777*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 6, 3, 9, 15, 12, 9 ; t40a, t55a 4778*c0909341SAndroid Build Coastguard Worker psubsw m3, m0, m4 ; t47a 4779*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 ; t32a 4780*c0909341SAndroid Build Coastguard Worker psubsw m4, m7, m2 ; t48a 4781*c0909341SAndroid Build Coastguard Worker paddsw m7, m2 ; t63a 4782*c0909341SAndroid Build Coastguard Worker psubsw m2, m5, m1 ; t40 4783*c0909341SAndroid Build Coastguard Worker paddsw m5, m1 ; t39 4784*c0909341SAndroid Build Coastguard Worker psubsw m1, m8, m6 ; t55 4785*c0909341SAndroid Build Coastguard Worker paddsw m8, m6 ; t56 4786*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 4, 3, 6, 9, 15, 13, 14 ; t47, t48 4787*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2W 1, 2, 6, 9, 15, 13, 14 ; t40a, t55a 4788*c0909341SAndroid Build Coastguard Worker ret 4789*c0909341SAndroid Build Coastguard Worker.main_part2_pass2: 4790*c0909341SAndroid Build Coastguard Worker sub r6, o_idct64_offset + 8 4791*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_1567_3784)] 4792*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_m3784_1567)] 4793*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pw_2896_2896)] 4794*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*5] ; stride*5 4795*c0909341SAndroid Build Coastguard Worker lea r3, [r9+strideq*1] ; stride*6 4796*c0909341SAndroid Build Coastguard Worker lea r7, [r9+strideq*2] ; stride*7 4797*c0909341SAndroid Build Coastguard Worker lea r8, [r3+strideq*2] ; stride*8 4798*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+r7] 4799*c0909341SAndroid Build Coastguard Worker.main_part2_pass2_loop: 4800*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_m2896_2896)] 4801*c0909341SAndroid Build Coastguard Worker call .main_part2_internal 4802*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(pw_2048)] 4803*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 0, 7, 0, 6, 9, 10, strideq*0, r3*4, r8*4, r7*8 4804*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 7, 8, 5, 0, 6, 7, strideq*0, r3*4, r8*4, r7*8 4805*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 8, 2, 1, 0, 6, 7, strideq*8, r8*2, r9*8, r3*8 4806*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 15, 3, 4, 0, 6, 7, strideq*8, r8*2, r9*8, r3*8 4807*c0909341SAndroid Build Coastguard Worker add dstq, strideq 4808*c0909341SAndroid Build Coastguard Worker sub r2, strideq 4809*c0909341SAndroid Build Coastguard Worker cmp tmp1q, tmp2q 4810*c0909341SAndroid Build Coastguard Worker jne .main_part2_pass2_loop 4811*c0909341SAndroid Build Coastguard Worker ret 4812*c0909341SAndroid Build Coastguard Worker 4813*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_8bpc, 4, 4, 0, dst, stride, c, eob 4814*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4815*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4816*c0909341SAndroid Build Coastguard Worker jnz .normal 4817*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 4818*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 4819*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 4820*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4821*c0909341SAndroid Build Coastguard Worker or r3d, 16 4822*c0909341SAndroid Build Coastguard Worker.dconly: 4823*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 4824*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_2048)] 4825*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 4826*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm2 4827*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 4828*c0909341SAndroid Build Coastguard Worker pxor m1, m1 4829*c0909341SAndroid Build Coastguard Worker.dconly_loop: 4830*c0909341SAndroid Build Coastguard Worker mova m2, [dstq+32*0] 4831*c0909341SAndroid Build Coastguard Worker mova m3, [dstq+32*1] 4832*c0909341SAndroid Build Coastguard Worker punpckhbw m4, m2, m1 4833*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m1 4834*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m3, m1 4835*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m1 4836*c0909341SAndroid Build Coastguard Worker paddw m4, m0 4837*c0909341SAndroid Build Coastguard Worker paddw m2, m0 4838*c0909341SAndroid Build Coastguard Worker paddw m5, m0 4839*c0909341SAndroid Build Coastguard Worker paddw m3, m0 4840*c0909341SAndroid Build Coastguard Worker packuswb m2, m4 4841*c0909341SAndroid Build Coastguard Worker packuswb m3, m5 4842*c0909341SAndroid Build Coastguard Worker mova [dstq+32*0], m2 4843*c0909341SAndroid Build Coastguard Worker mova [dstq+32*1], m3 4844*c0909341SAndroid Build Coastguard Worker add dstq, strideq 4845*c0909341SAndroid Build Coastguard Worker dec r3d 4846*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 4847*c0909341SAndroid Build Coastguard Worker RET 4848*c0909341SAndroid Build Coastguard Worker.normal: 4849*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 7, 16, 32*67, dst, stride, c, eob, tmp1, tmp2 4850*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+32*0, 32*4 4851*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4852*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 0, 4, 8, 12, 16, 20, 24, 28 4853*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 4854*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 4855*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 4856*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4857*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 4858*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 4859*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 4860*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 4861*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 4862*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 4863*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 4864*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 4865*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 4866*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4867*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 4868*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 4869*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 4870*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 4871*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 4872*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 4873*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 4874*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 4875*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+32*2, 32*4 4876*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4877*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 2, 6, 10, 14, 18, 22, 26, 30 4878*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4879*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 4880*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 4881*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 4882*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 4883*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 4884*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 1] 4885*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*31] 4886*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*17] 4887*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*15] 4888*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32* 9] 4889*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*23] 4890*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*25] 4891*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32* 7] 4892*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4893*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 1, 31, 17, 15, 9, 23, 25, 7 4894*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 4895*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 4896*c0909341SAndroid Build Coastguard Worker add r6, 8 4897*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 4898*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 4899*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 5] 4900*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*27] 4901*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*21] 4902*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*11] 4903*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*13] 4904*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*19] 4905*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*29] 4906*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32* 3] 4907*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4908*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 5, 27, 21, 11, 13, 19, 29, 3 4909*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 4910*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass1 4911*c0909341SAndroid Build Coastguard Worker sub tmp1q, 32*36 4912*c0909341SAndroid Build Coastguard Worker lea r2, [strideq*3] 4913*c0909341SAndroid Build Coastguard Worker mov tmp2d, 4 4914*c0909341SAndroid Build Coastguard Worker.pass2_loop: 4915*c0909341SAndroid Build Coastguard Worker lea r3, [tmp1q-32*8] 4916*c0909341SAndroid Build Coastguard Worker mova xm0, [r3 -32*4] 4917*c0909341SAndroid Build Coastguard Worker mova xm1, [r3 -32*3] 4918*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [tmp1q-32*4], 1 4919*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [tmp1q-32*3], 1 4920*c0909341SAndroid Build Coastguard Worker mova xm2, [r3 -32*2] 4921*c0909341SAndroid Build Coastguard Worker mova xm3, [r3 -32*1] 4922*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmp1q-32*2], 1 4923*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmp1q-32*1], 1 4924*c0909341SAndroid Build Coastguard Worker mova xm4, [r3 +32*0] 4925*c0909341SAndroid Build Coastguard Worker mova xm5, [r3 +32*1] 4926*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmp1q+32*0], 1 4927*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmp1q+32*1], 1 4928*c0909341SAndroid Build Coastguard Worker mova xm6, [r3 +32*2] 4929*c0909341SAndroid Build Coastguard Worker mova xm7, [r3 +32*3] 4930*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [tmp1q+32*2], 1 4931*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [tmp1q+32*3], 1 4932*c0909341SAndroid Build Coastguard Worker mova xm8, [r3 -32*4+16] 4933*c0909341SAndroid Build Coastguard Worker mova xm9, [r3 -32*3+16] 4934*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [tmp1q-32*4+16], 1 4935*c0909341SAndroid Build Coastguard Worker vinserti128 m9, [tmp1q-32*3+16], 1 4936*c0909341SAndroid Build Coastguard Worker mova xm10, [r3 -32*2+16] 4937*c0909341SAndroid Build Coastguard Worker mova xm11, [r3 -32*1+16] 4938*c0909341SAndroid Build Coastguard Worker vinserti128 m10, [tmp1q-32*2+16], 1 4939*c0909341SAndroid Build Coastguard Worker vinserti128 m11, [tmp1q-32*1+16], 1 4940*c0909341SAndroid Build Coastguard Worker mova xm12, [r3 +32*0+16] 4941*c0909341SAndroid Build Coastguard Worker mova xm13, [r3 +32*1+16] 4942*c0909341SAndroid Build Coastguard Worker vinserti128 m12, [tmp1q+32*0+16], 1 4943*c0909341SAndroid Build Coastguard Worker vinserti128 m13, [tmp1q+32*1+16], 1 4944*c0909341SAndroid Build Coastguard Worker mova xm14, [r3 +32*2+16] 4945*c0909341SAndroid Build Coastguard Worker mova xm15, [r3 +32*3+16] 4946*c0909341SAndroid Build Coastguard Worker vinserti128 m14, [tmp1q+32*2+16], 1 4947*c0909341SAndroid Build Coastguard Worker vinserti128 m15, [tmp1q+32*3+16], 1 4948*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m6 4949*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m7 4950*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_8192)] 4951*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).transpose_2x8x8_round 4952*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4953*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m15 4954*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_2048)] 4955*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m0, m2, m3, m4, m5, m6, m7 4956*c0909341SAndroid Build Coastguard Worker WRITE_16X2 2, 3, 1, 2, strideq*2, r2 4957*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, [rsp+32*1] 4958*c0909341SAndroid Build Coastguard Worker WRITE_16X2 0, 1, 2, 3, strideq*0, strideq*1 4959*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+strideq*4] 4960*c0909341SAndroid Build Coastguard Worker %define dstq r3 4961*c0909341SAndroid Build Coastguard Worker WRITE_16X2 4, 5, 2, 3, strideq*0, strideq*1 4962*c0909341SAndroid Build Coastguard Worker WRITE_16X2 6, 7, 2, 3, strideq*2, r2 4963*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m15}, m8, m9, m10, m11, m12, m13, m14 4964*c0909341SAndroid Build Coastguard Worker lea r3, [r3+strideq*4] 4965*c0909341SAndroid Build Coastguard Worker WRITE_16X2 8, 9, 2, 3, strideq*0, strideq*1 4966*c0909341SAndroid Build Coastguard Worker WRITE_16X2 10, 11, 2, 3, strideq*2, r2 4967*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, [rsp+32*0] 4968*c0909341SAndroid Build Coastguard Worker lea r3, [r3+strideq*4] 4969*c0909341SAndroid Build Coastguard Worker WRITE_16X2 12, 13, 2, 3, strideq*0, strideq*1 4970*c0909341SAndroid Build Coastguard Worker WRITE_16X2 14, 15, 2, 3, strideq*2, r2 4971*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 4972*c0909341SAndroid Build Coastguard Worker add r0, 16 4973*c0909341SAndroid Build Coastguard Worker dec tmp2d 4974*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 4975*c0909341SAndroid Build Coastguard Worker RET 4976*c0909341SAndroid Build Coastguard Worker 4977*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_8bpc, 4, 4, 0, dst, stride, c, eob 4978*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 4979*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4980*c0909341SAndroid Build Coastguard Worker jnz .normal 4981*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 4982*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 4983*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 4984*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4985*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 4986*c0909341SAndroid Build Coastguard Worker or r3d, 64 4987*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_8bpc).dconly 4988*c0909341SAndroid Build Coastguard Worker.normal: 4989*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 11, 16, 32*99, dst, stride, c, eob, tmp1, tmp2 4990*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 4991*c0909341SAndroid Build Coastguard Worker lea r10d, [eobq-136] 4992*c0909341SAndroid Build Coastguard Worker sar r10d, 31 4993*c0909341SAndroid Build Coastguard Worker.pass1_loop: 4994*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*16] 4995*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*1, 64*2, 1 4996*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4997*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 1, 3, 5, 7, 9, 11, 13, 15 4998*c0909341SAndroid Build Coastguard Worker test r10b, r10b 4999*c0909341SAndroid Build Coastguard Worker jnz .fast 5000*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H cq+64*17, 64*2, 2 5001*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 5002*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H cq+64*16, 64*2, 1 5003*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 5004*c0909341SAndroid Build Coastguard Worker pxor m15, m15 5005*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m15}, 16, 17, 18, 19, 20, 21, 22, 23, \ 5006*c0909341SAndroid Build Coastguard Worker 24, 25, 26, 27, 28, 29, 30, 31 5007*c0909341SAndroid Build Coastguard Worker jmp .idct16 5008*c0909341SAndroid Build Coastguard Worker.fast: 5009*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5010*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5011*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 5012*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 5013*c0909341SAndroid Build Coastguard Worker.idct16: 5014*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*0, 64*2, 1 5015*c0909341SAndroid Build Coastguard Worker pxor m15, m15 5016*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m15}, 0, 2, 4, 6, 8, 10, 12, 14 5017*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5018*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).pass1_end 5019*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_16384)] 5020*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).transpose_2x8x8_round 5021*c0909341SAndroid Build Coastguard Worker lea r3, [tmp1q+32*48] 5022*c0909341SAndroid Build Coastguard Worker mova m15, [rsp] 5023*c0909341SAndroid Build Coastguard Worker mova [r3-32*4], m0 5024*c0909341SAndroid Build Coastguard Worker mova [r3-32*3], m2 5025*c0909341SAndroid Build Coastguard Worker mova [r3-32*2], m4 5026*c0909341SAndroid Build Coastguard Worker mova [r3-32*1], m6 5027*c0909341SAndroid Build Coastguard Worker mova [r3+32*0], m8 5028*c0909341SAndroid Build Coastguard Worker mova [r3+32*1], m10 5029*c0909341SAndroid Build Coastguard Worker mova [r3+32*2], m12 5030*c0909341SAndroid Build Coastguard Worker mova [r3+32*3], m14 5031*c0909341SAndroid Build Coastguard Worker add r3, 32*24 5032*c0909341SAndroid Build Coastguard Worker mova [r3-32*4], m1 5033*c0909341SAndroid Build Coastguard Worker mova [r3-32*3], m3 5034*c0909341SAndroid Build Coastguard Worker mova [r3-32*2], m5 5035*c0909341SAndroid Build Coastguard Worker mova [r3-32*1], m7 5036*c0909341SAndroid Build Coastguard Worker mova [r3+32*0], m9 5037*c0909341SAndroid Build Coastguard Worker mova [r3+32*1], m11 5038*c0909341SAndroid Build Coastguard Worker mova [r3+32*2], m13 5039*c0909341SAndroid Build Coastguard Worker mova [r3+32*3], m15 5040*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pw_16384)] 5041*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, [tmp1q-32*4] 5042*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, [tmp1q-32*3] 5043*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m9, [tmp1q-32*2] 5044*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m9, [tmp1q-32*1] 5045*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m9, [tmp1q+32*0] 5046*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m9, [tmp1q+32*1] 5047*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9, [tmp1q+32*2] 5048*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, [tmp1q+32*3] 5049*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 5050*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5051*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, [tmp2q-32*4] 5052*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m1 5053*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, [tmp2q-32*3] 5054*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m2 5055*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m9, [tmp2q-32*2] 5056*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m3 5057*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m9, [tmp2q-32*1] 5058*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m4 5059*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m9, [tmp2q+32*0] 5060*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m5 5061*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m9, [tmp2q+32*1] 5062*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m6 5063*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m9, [tmp2q+32*2] 5064*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m7 5065*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m9, [tmp2q+32*3] 5066*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 5067*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m0 5068*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m1 5069*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m2 5070*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m3 5071*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m4 5072*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m5 5073*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m6 5074*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m7 5075*c0909341SAndroid Build Coastguard Worker add cq, 32 5076*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5077*c0909341SAndroid Build Coastguard Worker add r10d, 0x80000000 5078*c0909341SAndroid Build Coastguard Worker jnc .pass1_loop 5079*c0909341SAndroid Build Coastguard Worker lea r2, [rsp+32*55] 5080*c0909341SAndroid Build Coastguard Worker lea r7, [r2+32*24] 5081*c0909341SAndroid Build Coastguard Worker.pass2_loop: 5082*c0909341SAndroid Build Coastguard Worker lea r3, [r2+32*8] 5083*c0909341SAndroid Build Coastguard Worker lea r8, [r7+32*8] 5084*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*4] 5085*c0909341SAndroid Build Coastguard Worker mova m1, [r2-32*2] 5086*c0909341SAndroid Build Coastguard Worker mova m2, [r2+32*0] 5087*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*2] 5088*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5089*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14 5090*c0909341SAndroid Build Coastguard Worker test r10b, r10b 5091*c0909341SAndroid Build Coastguard Worker jnz .fast2 5092*c0909341SAndroid Build Coastguard Worker mova m4, [r3-32*4] 5093*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*2] 5094*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*0] 5095*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*2] 5096*c0909341SAndroid Build Coastguard Worker.fast2: 5097*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 5098*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*39] 5099*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5100*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 5101*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5102*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 5103*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 5104*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 5105*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 5106*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 5107*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 5108*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 5109*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5110*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 5111*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 5112*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 5113*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 5114*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 5115*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 5116*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 5117*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 5118*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*3] 5119*c0909341SAndroid Build Coastguard Worker mova m1, [r2-32*1] 5120*c0909341SAndroid Build Coastguard Worker mova m2, [r2+32*1] 5121*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*3] 5122*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5123*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5124*c0909341SAndroid Build Coastguard Worker test r10b, r10b 5125*c0909341SAndroid Build Coastguard Worker jnz .fast3 5126*c0909341SAndroid Build Coastguard Worker mova m4, [r3-32*3] 5127*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*1] 5128*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*1] 5129*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*3] 5130*c0909341SAndroid Build Coastguard Worker.fast3: 5131*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5132*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 5133*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5134*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 5135*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5136*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 5137*c0909341SAndroid Build Coastguard Worker mova m0, [r7-32*4] 5138*c0909341SAndroid Build Coastguard Worker mova m3, [r7+32*3] 5139*c0909341SAndroid Build Coastguard Worker mova m4, [r7+32*0] 5140*c0909341SAndroid Build Coastguard Worker mova m7, [r7-32*1] 5141*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5142*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 5143*c0909341SAndroid Build Coastguard Worker test r10b, r10b 5144*c0909341SAndroid Build Coastguard Worker jnz .fast4 5145*c0909341SAndroid Build Coastguard Worker mova m1, [r8+32*3] 5146*c0909341SAndroid Build Coastguard Worker mova m2, [r8-32*4] 5147*c0909341SAndroid Build Coastguard Worker mova m5, [r8-32*1] 5148*c0909341SAndroid Build Coastguard Worker mova m6, [r8+32*0] 5149*c0909341SAndroid Build Coastguard Worker.fast4: 5150*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 5151*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5152*c0909341SAndroid Build Coastguard Worker add r6, 8 5153*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5154*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 5155*c0909341SAndroid Build Coastguard Worker mova m0, [r7-32*2] 5156*c0909341SAndroid Build Coastguard Worker mova m3, [r7+32*1] 5157*c0909341SAndroid Build Coastguard Worker mova m4, [r7+32*2] 5158*c0909341SAndroid Build Coastguard Worker mova m7, [r7-32*3] 5159*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5160*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 5161*c0909341SAndroid Build Coastguard Worker test r10b, r10b 5162*c0909341SAndroid Build Coastguard Worker jnz .fast5 5163*c0909341SAndroid Build Coastguard Worker mova m1, [r8+32*1] 5164*c0909341SAndroid Build Coastguard Worker mova m2, [r8-32*2] 5165*c0909341SAndroid Build Coastguard Worker mova m5, [r8-32*3] 5166*c0909341SAndroid Build Coastguard Worker mova m6, [r8+32*2] 5167*c0909341SAndroid Build Coastguard Worker.fast5: 5168*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5169*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass2 5170*c0909341SAndroid Build Coastguard Worker add r10d, 0x80000000 5171*c0909341SAndroid Build Coastguard Worker jc .ret 5172*c0909341SAndroid Build Coastguard Worker lea r2, [rsp+32*7] 5173*c0909341SAndroid Build Coastguard Worker lea r7, [r2+32*16] 5174*c0909341SAndroid Build Coastguard Worker sub dstq, r8 5175*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4+16] 5176*c0909341SAndroid Build Coastguard Worker jmp .pass2_loop 5177*c0909341SAndroid Build Coastguard Worker.ret: 5178*c0909341SAndroid Build Coastguard Worker RET 5179*c0909341SAndroid Build Coastguard Worker 5180*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_8bpc, 4, 4, 0, dst, stride, c, eob 5181*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 5182*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5183*c0909341SAndroid Build Coastguard Worker jnz .normal 5184*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 5185*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 5186*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_16384)] 5187*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 5188*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1 5189*c0909341SAndroid Build Coastguard Worker or r3d, 32 5190*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_8bpc).dconly 5191*c0909341SAndroid Build Coastguard Worker.normal: 5192*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 9, 16, 32*131, dst, stride, c, eob, tmp1, tmp2, \ 5193*c0909341SAndroid Build Coastguard Worker base, tmp3, tmp4 5194*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 5195*c0909341SAndroid Build Coastguard Worker lea tmp4d, [eobq-136] 5196*c0909341SAndroid Build Coastguard Worker.pass1_loop: 5197*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*0, 64*4, 1 5198*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5199*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 0, 4, 8, 12, 16, 20, 24, 28 5200*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 5201*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 5202*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5203*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 5204*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5205*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 5206*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 5207*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 5208*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 5209*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 5210*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 5211*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 5212*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5213*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 5214*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 5215*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 5216*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 5217*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 5218*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 5219*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 5220*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 5221*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*2, 64*4, 1 5222*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5223*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 2, 6, 10, 14, 18, 22, 26, 30 5224*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5225*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 5226*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5227*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 5228*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5229*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 5230*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_2896x8)] 5231*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [cq+64* 1] 5232*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [cq+64*31] 5233*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [cq+64*17] 5234*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [cq+64*15] 5235*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, [cq+64* 9] 5236*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, [cq+64*23] 5237*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [cq+64*25] 5238*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [cq+64* 7] 5239*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5240*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 1, 31, 17, 15, 9, 23, 25, 7 5241*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 5242*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5243*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_2896x8-(o_idct64_offset))] 5244*c0909341SAndroid Build Coastguard Worker add r6, 8 5245*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5246*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 5247*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, [cq+64* 5] 5248*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, [cq+64*27] 5249*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, [cq+64*21] 5250*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, [cq+64*11] 5251*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, [cq+64*13] 5252*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, [cq+64*19] 5253*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, [cq+64*29] 5254*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, [cq+64* 3] 5255*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5256*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 5, 27, 21, 11, 13, 19, 29, 3 5257*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5258*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass1 5259*c0909341SAndroid Build Coastguard Worker sub tmp1q, 32*44 5260*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_16384)] 5261*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x32_8bpc).transpose_round_interleave 5262*c0909341SAndroid Build Coastguard Worker add cq, 32 5263*c0909341SAndroid Build Coastguard Worker add tmp4d, 0x80000000 5264*c0909341SAndroid Build Coastguard Worker jnc .pass1_loop 5265*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*15] 5266*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 5267*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 5268*c0909341SAndroid Build Coastguard Worker add r2, dstq 5269*c0909341SAndroid Build Coastguard Worker mov tmp4b, 4 5270*c0909341SAndroid Build Coastguard Worker.pass2_loop: 5271*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*64] 5272*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp1q-32*4, 32 5273*c0909341SAndroid Build Coastguard Worker test tmp4d, 0x40000000 5274*c0909341SAndroid Build Coastguard Worker jnz .fast 5275*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H tmp2q-32*4, 32 5276*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 5277*c0909341SAndroid Build Coastguard Worker lea tmp3q, [tmp2q-32*8] 5278*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS_H tmp3q-32*4, 32 5279*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 5280*c0909341SAndroid Build Coastguard Worker jmp .idct16 5281*c0909341SAndroid Build Coastguard Worker.fast: 5282*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5283*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5284*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 5285*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 5286*c0909341SAndroid Build Coastguard Worker.idct16: 5287*c0909341SAndroid Build Coastguard Worker lea tmp3q, [tmp1q-32*8] 5288*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS tmp3q-32*4, 32 5289*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5290*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).pass2_end 5291*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5292*c0909341SAndroid Build Coastguard Worker sub dstq, r3 5293*c0909341SAndroid Build Coastguard Worker lea r2, [r2+r3+16] 5294*c0909341SAndroid Build Coastguard Worker add dstq, 16 5295*c0909341SAndroid Build Coastguard Worker dec tmp4b 5296*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 5297*c0909341SAndroid Build Coastguard Worker RET 5298*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5299*c0909341SAndroid Build Coastguard Worker.transpose_round_interleave: 5300*c0909341SAndroid Build Coastguard Worker mov tmp3d, 4 5301*c0909341SAndroid Build Coastguard Worker.loop: 5302*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 5303*c0909341SAndroid Build Coastguard Worker mova xm0, [tmp1q-32*4] 5304*c0909341SAndroid Build Coastguard Worker mova xm1, [tmp1q-32*3] 5305*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [tmp2q-32*4], 1 5306*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [tmp2q-32*3], 1 5307*c0909341SAndroid Build Coastguard Worker mova xm2, [tmp1q-32*2] 5308*c0909341SAndroid Build Coastguard Worker mova xm3, [tmp1q-32*1] 5309*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmp2q-32*2], 1 5310*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmp2q-32*1], 1 5311*c0909341SAndroid Build Coastguard Worker mova xm4, [tmp1q+32*0] 5312*c0909341SAndroid Build Coastguard Worker mova xm5, [tmp1q+32*1] 5313*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmp2q+32*0], 1 5314*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmp2q+32*1], 1 5315*c0909341SAndroid Build Coastguard Worker mova xm6, [tmp1q+32*2] 5316*c0909341SAndroid Build Coastguard Worker mova xm7, [tmp1q+32*3] 5317*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [tmp2q+32*2], 1 5318*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [tmp2q+32*3], 1 5319*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m0, m1, m2, m3, m4, m5, m6, m7 5320*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 5321*c0909341SAndroid Build Coastguard Worker mova xm8, [tmp1q-32*4+16] 5322*c0909341SAndroid Build Coastguard Worker mova xm9, [tmp1q-32*3+16] 5323*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [tmp2q-32*4+16], 1 5324*c0909341SAndroid Build Coastguard Worker vinserti128 m9, [tmp2q-32*3+16], 1 5325*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5326*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*4], m1 5327*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m2 5328*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*3], m3 5329*c0909341SAndroid Build Coastguard Worker mova xm2, [tmp1q-32*2+16] 5330*c0909341SAndroid Build Coastguard Worker mova xm3, [tmp1q-32*1+16] 5331*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmp2q-32*2+16], 1 5332*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmp2q-32*1+16], 1 5333*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m4 5334*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*2], m5 5335*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m6 5336*c0909341SAndroid Build Coastguard Worker mova [tmp2q-32*1], m7 5337*c0909341SAndroid Build Coastguard Worker mova xm4, [tmp1q+32*0+16] 5338*c0909341SAndroid Build Coastguard Worker mova xm5, [tmp1q+32*1+16] 5339*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmp2q+32*0+16], 1 5340*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmp2q+32*1+16], 1 5341*c0909341SAndroid Build Coastguard Worker mova xm6, [tmp1q+32*2+16] 5342*c0909341SAndroid Build Coastguard Worker mova xm7, [tmp1q+32*3+16] 5343*c0909341SAndroid Build Coastguard Worker vinserti128 m6, [tmp2q+32*2+16], 1 5344*c0909341SAndroid Build Coastguard Worker vinserti128 m7, [tmp2q+32*3+16], 1 5345*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8, m10 5346*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, m10 5347*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m2, m3, m4, m5, m6, m7 5348*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_8bpc).transpose8x8 5349*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m0 5350*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*0], m1 5351*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m2 5352*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*1], m3 5353*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m4 5354*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*2], m5 5355*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m6 5356*c0909341SAndroid Build Coastguard Worker mova [tmp2q+32*3], m7 5357*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5358*c0909341SAndroid Build Coastguard Worker dec tmp3d 5359*c0909341SAndroid Build Coastguard Worker jg .loop 5360*c0909341SAndroid Build Coastguard Worker ret 5361*c0909341SAndroid Build Coastguard Worker 5362*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_8bpc, 4, 4, 0, dst, stride, c, eob 5363*c0909341SAndroid Build Coastguard Worker lea r6, [o_base] 5364*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5365*c0909341SAndroid Build Coastguard Worker jnz .normal 5366*c0909341SAndroid Build Coastguard Worker movd xm1, [o(pw_2896x8)] 5367*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm1, [cq] 5368*c0909341SAndroid Build Coastguard Worker movd xm2, [o(pw_8192)] 5369*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 5370*c0909341SAndroid Build Coastguard Worker or r3d, 64 5371*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_8bpc).dconly 5372*c0909341SAndroid Build Coastguard Worker.normal: 5373*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 11, 16, 32*199, dst, stride, c, eob, tmp1, tmp2 5374*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*71] 5375*c0909341SAndroid Build Coastguard Worker lea r10d, [eobq-136] 5376*c0909341SAndroid Build Coastguard Worker.pass1_loop: 5377*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*0, 64*4 5378*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5379*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 0, 4, 8, 12, 16, 20, 24, 28 5380*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 5381*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 5382*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5383*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 5384*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5385*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 5386*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 5387*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 5388*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 5389*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 5390*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 5391*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 5392*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5393*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 5394*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 5395*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 5396*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 5397*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 5398*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 5399*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 5400*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 5401*c0909341SAndroid Build Coastguard Worker LOAD_8ROWS cq+64*2, 64*4 5402*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5403*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 2, 6, 10, 14, 18, 22, 26, 30 5404*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5405*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 5406*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5407*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 5408*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5409*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 5410*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 1] 5411*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*31] 5412*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*17] 5413*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*15] 5414*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 9] 5415*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*23] 5416*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*25] 5417*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 7] 5418*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5419*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 1, 31, 17, 15, 9, 23, 25, 7 5420*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 5421*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5422*c0909341SAndroid Build Coastguard Worker add r6, 8 5423*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5424*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 5425*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 5] 5426*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*27] 5427*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*21] 5428*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*11] 5429*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64*13] 5430*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*19] 5431*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*29] 5432*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 3] 5433*c0909341SAndroid Build Coastguard Worker pxor m8, m8 5434*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 5, 27, 21, 11, 13, 19, 29, 3 5435*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5436*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass1 5437*c0909341SAndroid Build Coastguard Worker sub tmp1q, 32*44 5438*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_8192)] 5439*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x32_8bpc).transpose_round_interleave 5440*c0909341SAndroid Build Coastguard Worker add cq, 32 5441*c0909341SAndroid Build Coastguard Worker add r10d, 0x80000000 5442*c0909341SAndroid Build Coastguard Worker jnc .pass1_loop 5443*c0909341SAndroid Build Coastguard Worker lea tmp1q, [rsp+32*7] 5444*c0909341SAndroid Build Coastguard Worker mov r10b, 4 5445*c0909341SAndroid Build Coastguard Worker.pass2_loop: 5446*c0909341SAndroid Build Coastguard Worker lea r2, [tmp1q+32*64] 5447*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*4] 5448*c0909341SAndroid Build Coastguard Worker mova m1, [r2-32*2] 5449*c0909341SAndroid Build Coastguard Worker mova m2, [r2+32*0] 5450*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*2] 5451*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5452*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14 5453*c0909341SAndroid Build Coastguard Worker mova [rsp], m4 5454*c0909341SAndroid Build Coastguard Worker test r10d, 0x40000000 5455*c0909341SAndroid Build Coastguard Worker jnz .fast 5456*c0909341SAndroid Build Coastguard Worker lea r3, [r2+32*64] 5457*c0909341SAndroid Build Coastguard Worker mova m4, [r3-32*4] 5458*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*2] 5459*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*0] 5460*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*2] 5461*c0909341SAndroid Build Coastguard Worker.fast: 5462*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 5463*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 5464*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m0 5465*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m1 5466*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m2 5467*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m3 5468*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m4 5469*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m5 5470*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m6 5471*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m7 5472*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5473*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*4], m8 5474*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*3], m9 5475*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*2], m10 5476*c0909341SAndroid Build Coastguard Worker mova [tmp1q-32*1], m11 5477*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*0], m12 5478*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*1], m13 5479*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*2], m14 5480*c0909341SAndroid Build Coastguard Worker mova [tmp1q+32*3], m15 5481*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*3] 5482*c0909341SAndroid Build Coastguard Worker mova m1, [r2-32*1] 5483*c0909341SAndroid Build Coastguard Worker mova m2, [r2+32*1] 5484*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*3] 5485*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5486*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5487*c0909341SAndroid Build Coastguard Worker test r10d, 0x40000000 5488*c0909341SAndroid Build Coastguard Worker jnz .fast2 5489*c0909341SAndroid Build Coastguard Worker mova m4, [r3-32*3] 5490*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*1] 5491*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*1] 5492*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*3] 5493*c0909341SAndroid Build Coastguard Worker.fast2: 5494*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5495*c0909341SAndroid Build Coastguard Worker lea tmp2q, [tmp1q+32*8] 5496*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 5497*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pd_2048)] 5498*c0909341SAndroid Build Coastguard Worker add r2, 32*8 5499*c0909341SAndroid Build Coastguard Worker add r3, 32*8 5500*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*16 5501*c0909341SAndroid Build Coastguard Worker add tmp2q, 32*32 5502*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*4] ; 1 5503*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*3] ; 15 5504*c0909341SAndroid Build Coastguard Worker mova m4, [r2+32*0] ; 9 5505*c0909341SAndroid Build Coastguard Worker mova m7, [r2-32*1] ; 7 5506*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5507*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 5508*c0909341SAndroid Build Coastguard Worker test r10d, 0x40000000 5509*c0909341SAndroid Build Coastguard Worker jnz .fast3 5510*c0909341SAndroid Build Coastguard Worker mova m1, [r3+32*3] ; 31 5511*c0909341SAndroid Build Coastguard Worker mova m2, [r3-32*4] ; 17 5512*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*1] ; 23 5513*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*0] ; 25 5514*c0909341SAndroid Build Coastguard Worker.fast3: 5515*c0909341SAndroid Build Coastguard Worker add r6, o_idct64_offset 5516*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5517*c0909341SAndroid Build Coastguard Worker add r6, 8 5518*c0909341SAndroid Build Coastguard Worker add tmp1q, 32*8 5519*c0909341SAndroid Build Coastguard Worker sub tmp2q, 32*8 5520*c0909341SAndroid Build Coastguard Worker mova m0, [r2-32*2] ; 5 5521*c0909341SAndroid Build Coastguard Worker mova m3, [r2+32*1] ; 11 5522*c0909341SAndroid Build Coastguard Worker mova m4, [r2+32*2] ; 13 5523*c0909341SAndroid Build Coastguard Worker mova m7, [r2-32*3] ; 3 5524*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5525*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m2, m5, m6 5526*c0909341SAndroid Build Coastguard Worker test r10d, 0x40000000 5527*c0909341SAndroid Build Coastguard Worker jnz .fast4 5528*c0909341SAndroid Build Coastguard Worker mova m1, [r3+32*1] ; 27 5529*c0909341SAndroid Build Coastguard Worker mova m2, [r3-32*2] ; 21 5530*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*3] ; 19 5531*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*2] ; 29 5532*c0909341SAndroid Build Coastguard Worker.fast4: 5533*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 5534*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_pass2 5535*c0909341SAndroid Build Coastguard Worker sub tmp1q, 32*28 5536*c0909341SAndroid Build Coastguard Worker sub dstq, r8 5537*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4+16] 5538*c0909341SAndroid Build Coastguard Worker dec r10b 5539*c0909341SAndroid Build Coastguard Worker jg .pass2_loop 5540*c0909341SAndroid Build Coastguard Worker RET 5541*c0909341SAndroid Build Coastguard Worker 5542*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64 5543