1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 7*9880d681SAndroid Build Coastguard Worker 8*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { 9*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v16i8c: 10*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 11*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] 12*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 13*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm2 14*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 15*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 16*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm2 17*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 18*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm3, %xmm2 19*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 20*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 21*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm0 22*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm3, %xmm0 23*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm2, %xmm0 24*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 25*9880d681SAndroid Build Coastguard Worker; 26*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v16i8c: 27*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 28*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 29*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2 30*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm1 31*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 32*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm3, %xmm1 33*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 34*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 35*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm0 36*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm3, %xmm0 37*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm0, %xmm1 38*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm1, %xmm0 39*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 40*9880d681SAndroid Build Coastguard Worker; 41*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v16i8c: 42*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 43*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 44*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 45*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 46*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 47*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 48*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 49*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 50*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 51*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 52*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 53*9880d681SAndroid Build Coastguard Worker; 54*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v16i8c: 55*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 56*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 57*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 58*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 59*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 60*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 61*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 62*9880d681SAndroid Build Coastguard Worker; 63*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v16i8c: 64*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 65*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 66*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 67*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 68*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 69*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 70*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 71*9880d681SAndroid Build Coastguard Workerentry: 72*9880d681SAndroid Build Coastguard Worker %A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 > 73*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %A 74*9880d681SAndroid Build Coastguard Worker} 75*9880d681SAndroid Build Coastguard Worker 76*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_v8i16c(<8 x i16> %i) nounwind { 77*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v8i16c: 78*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 79*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 80*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 81*9880d681SAndroid Build Coastguard Worker; 82*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i16c: 83*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 84*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 85*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 86*9880d681SAndroid Build Coastguard Workerentry: 87*9880d681SAndroid Build Coastguard Worker %A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 > 88*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %A 89*9880d681SAndroid Build Coastguard Worker} 90*9880d681SAndroid Build Coastguard Worker 91*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32c(<4 x i32> %i) nounwind { 92*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32c: 93*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 94*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [117,117,117,117] 95*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 96*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm0 97*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 98*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm2 99*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 100*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 101*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 102*9880d681SAndroid Build Coastguard Worker; 103*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32c: 104*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 105*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 106*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 107*9880d681SAndroid Build Coastguard Worker; 108*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32c: 109*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 110*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 111*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 112*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 113*9880d681SAndroid Build Coastguard Workerentry: 114*9880d681SAndroid Build Coastguard Worker %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > 115*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %A 116*9880d681SAndroid Build Coastguard Worker} 117*9880d681SAndroid Build Coastguard Worker 118*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64c(<2 x i64> %i) nounwind { 119*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64c: 120*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 121*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{.*#+}} xmm1 = [117,117] 122*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm2 123*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm1, %xmm2 124*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm0 125*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm1, %xmm0 126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm0 127*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm2, %xmm0 128*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 129*9880d681SAndroid Build Coastguard Worker; 130*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64c: 131*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 132*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117] 133*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 134*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 135*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 137*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 138*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 139*9880d681SAndroid Build Coastguard Workerentry: 140*9880d681SAndroid Build Coastguard Worker %A = mul <2 x i64> %i, < i64 117, i64 117 > 141*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %A 142*9880d681SAndroid Build Coastguard Worker} 143*9880d681SAndroid Build Coastguard Worker 144*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind { 145*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v16i8: 146*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 147*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm2 148*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 149*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 150*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm3 151*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 152*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 153*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm3 154*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 155*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm2, %xmm3 156*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 157*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 158*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 159*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 160*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm0 161*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm2, %xmm0 162*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm3, %xmm0 163*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 164*9880d681SAndroid Build Coastguard Worker; 165*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v16i8: 166*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 167*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm3 168*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 169*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm3, %xmm2 170*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 171*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm3, %xmm2 172*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 173*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm1 174*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 175*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 176*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm1, %xmm0 177*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm3, %xmm0 178*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm0, %xmm2 179*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm2, %xmm0 180*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 181*9880d681SAndroid Build Coastguard Worker; 182*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v16i8: 183*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 184*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 185*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 188*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 189*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 190*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 191*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 192*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 193*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 194*9880d681SAndroid Build Coastguard Worker; 195*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v16i8: 196*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 197*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1 198*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 199*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 200*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 201*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 202*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 203*9880d681SAndroid Build Coastguard Worker; 204*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v16i8: 205*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 206*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm1 207*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 208*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 209*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 210*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 211*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 212*9880d681SAndroid Build Coastguard Workerentry: 213*9880d681SAndroid Build Coastguard Worker %A = mul <16 x i8> %i, %j 214*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %A 215*9880d681SAndroid Build Coastguard Worker} 216*9880d681SAndroid Build Coastguard Worker 217*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_v8i16(<8 x i16> %i, <8 x i16> %j) nounwind { 218*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v8i16: 219*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 220*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw %xmm1, %xmm0 221*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 222*9880d681SAndroid Build Coastguard Worker; 223*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i16: 224*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 225*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 226*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 227*9880d681SAndroid Build Coastguard Workerentry: 228*9880d681SAndroid Build Coastguard Worker %A = mul <8 x i16> %i, %j 229*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %A 230*9880d681SAndroid Build Coastguard Worker} 231*9880d681SAndroid Build Coastguard Worker 232*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32(<4 x i32> %i, <4 x i32> %j) nounwind { 233*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32: 234*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 235*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 236*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm0 237*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 238*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 239*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm1 240*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 241*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 242*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 243*9880d681SAndroid Build Coastguard Worker; 244*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32: 245*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 246*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld %xmm1, %xmm0 247*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 248*9880d681SAndroid Build Coastguard Worker; 249*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32: 250*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 251*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 253*9880d681SAndroid Build Coastguard Workerentry: 254*9880d681SAndroid Build Coastguard Worker %A = mul <4 x i32> %i, %j 255*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %A 256*9880d681SAndroid Build Coastguard Worker} 257*9880d681SAndroid Build Coastguard Worker 258*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64(<2 x i64> %i, <2 x i64> %j) nounwind { 259*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64: 260*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 261*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm2 262*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm1, %xmm2 263*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm1, %xmm3 264*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm3 265*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm0, %xmm3 266*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm3 267*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm3, %xmm2 268*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm0 269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm1, %xmm0 270*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm0 271*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm2, %xmm0 272*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 273*9880d681SAndroid Build Coastguard Worker; 274*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64: 275*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 277*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %xmm1, %xmm3 278*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 279*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %xmm3, %xmm3 280*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 281*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 282*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 283*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 284*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm0, %xmm2, %xmm0 285*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 286*9880d681SAndroid Build Coastguard Workerentry: 287*9880d681SAndroid Build Coastguard Worker %A = mul <2 x i64> %i, %j 288*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %A 289*9880d681SAndroid Build Coastguard Worker} 290*9880d681SAndroid Build Coastguard Worker 291*9880d681SAndroid Build Coastguard Workerdeclare void @foo() 292*9880d681SAndroid Build Coastguard Worker 293*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32spill(<4 x i32> %i, <4 x i32> %j) nounwind { 294*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32spill: 295*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 296*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq $40, %rsp 297*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill 298*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 299*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: callq foo 300*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 301*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 302*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload 303*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm0 304*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 305*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 306*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm2 307*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 308*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 309*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: addq $40, %rsp 310*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 311*9880d681SAndroid Build Coastguard Worker; 312*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32spill: 313*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 314*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq $40, %rsp 315*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill 316*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 317*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: callq foo 318*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 319*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload 320*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: addq $40, %rsp 321*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 322*9880d681SAndroid Build Coastguard Worker; 323*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32spill: 324*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 325*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq $40, %rsp 326*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill 327*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 328*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: callq foo 329*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload 330*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulld {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 331*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq $40, %rsp 332*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 333*9880d681SAndroid Build Coastguard Workerentry: 334*9880d681SAndroid Build Coastguard Worker ; Use a call to force spills. 335*9880d681SAndroid Build Coastguard Worker call void @foo() 336*9880d681SAndroid Build Coastguard Worker %A = mul <4 x i32> %i, %j 337*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %A 338*9880d681SAndroid Build Coastguard Worker} 339*9880d681SAndroid Build Coastguard Worker 340*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64spill(<2 x i64> %i, <2 x i64> %j) nounwind { 341*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64spill: 342*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 343*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subq $40, %rsp 344*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill 345*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 346*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: callq foo 347*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 348*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm2 349*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload 350*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm3, %xmm2 351*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm3, %xmm1 352*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm1 353*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm0, %xmm1 354*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm1 355*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm1, %xmm2 356*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm0 357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm3, %xmm0 358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm0 359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm2, %xmm0 360*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addq $40, %rsp 361*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 362*9880d681SAndroid Build Coastguard Worker; 363*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64spill: 364*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 365*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq $40, %rsp 366*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill 367*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 368*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: callq foo 369*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload 370*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rsp), %xmm3 # 16-byte Reload 371*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm2, %xmm3, %xmm0 372*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %xmm2, %xmm1 373*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 374*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %xmm1, %xmm1 375*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 376*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %xmm3, %xmm1 377*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 378*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %xmm1, %xmm1 379*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 380*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq $40, %rsp 381*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 382*9880d681SAndroid Build Coastguard Workerentry: 383*9880d681SAndroid Build Coastguard Worker ; Use a call to force spills. 384*9880d681SAndroid Build Coastguard Worker call void @foo() 385*9880d681SAndroid Build Coastguard Worker %A = mul <2 x i64> %i, %j 386*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %A 387*9880d681SAndroid Build Coastguard Worker} 388*9880d681SAndroid Build Coastguard Worker 389*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind { 390*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v32i8c: 391*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 392*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] 393*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 394*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm3 395*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 396*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 397*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm3 398*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 399*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm3 400*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 401*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 402*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm0 403*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm0 404*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm3, %xmm0 405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm3 406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm3 409*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm3 410*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 411*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 412*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm1 413*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm1 414*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm3, %xmm1 415*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 416*9880d681SAndroid Build Coastguard Worker; 417*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v32i8c: 418*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 419*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 420*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm4 421*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm2 422*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] 423*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm2 424*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 425*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 426*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm0 427*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm0 428*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm0, %xmm2 429*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm3 430*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm3 431*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm3 432*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 433*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 434*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm0 435*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm0 436*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm0, %xmm3 437*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm2, %xmm0 438*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm3, %xmm1 439*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 440*9880d681SAndroid Build Coastguard Worker; 441*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v32i8c: 442*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 443*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 444*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 445*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 446*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 447*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 448*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 449*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm3, %xmm3 450*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 451*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 452*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 453*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 454*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 455*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 456*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 457*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 458*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 459*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 460*9880d681SAndroid Build Coastguard Worker; 461*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v32i8c: 462*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 463*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 464*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 465*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1 466*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 467*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 468*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 469*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 470*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 471*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 472*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 473*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 474*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 475*9880d681SAndroid Build Coastguard Worker; 476*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v32i8c: 477*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 478*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] 479*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 480*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 481*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 482*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 483*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 484*9880d681SAndroid Build Coastguard Workerentry: 485*9880d681SAndroid Build Coastguard Worker %A = mul <32 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 > 486*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %A 487*9880d681SAndroid Build Coastguard Worker} 488*9880d681SAndroid Build Coastguard Worker 489*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @mul_v16i16c(<16 x i16> %i) nounwind { 490*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v16i16c: 491*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 492*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117] 493*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw %xmm2, %xmm0 494*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw %xmm2, %xmm1 495*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 496*9880d681SAndroid Build Coastguard Worker; 497*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v16i16c: 498*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 499*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 500*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 501*9880d681SAndroid Build Coastguard Workerentry: 502*9880d681SAndroid Build Coastguard Worker %A = mul <16 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 > 503*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %A 504*9880d681SAndroid Build Coastguard Worker} 505*9880d681SAndroid Build Coastguard Worker 506*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @mul_v8i32c(<8 x i32> %i) nounwind { 507*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v8i32c: 508*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 509*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117] 510*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 511*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm0 512*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 513*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm3 514*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 517*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm1 518*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 519*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm3 520*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 521*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 522*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 523*9880d681SAndroid Build Coastguard Worker; 524*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v8i32c: 525*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 526*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117] 527*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld %xmm2, %xmm0 528*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld %xmm2, %xmm1 529*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 530*9880d681SAndroid Build Coastguard Worker; 531*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i32c: 532*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 533*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 534*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 535*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 536*9880d681SAndroid Build Coastguard Workerentry: 537*9880d681SAndroid Build Coastguard Worker %A = mul <8 x i32> %i, < i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117 > 538*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %A 539*9880d681SAndroid Build Coastguard Worker} 540*9880d681SAndroid Build Coastguard Worker 541*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mul_v4i64c(<4 x i64> %i) nounwind { 542*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v4i64c: 543*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 544*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{.*#+}} xmm2 = [117,117] 545*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm3 546*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm3 547*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm0 548*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm0 549*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm0 550*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm3, %xmm0 551*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm1, %xmm3 552*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm3 553*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm1 554*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm1 555*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm1 556*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm3, %xmm1 557*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 558*9880d681SAndroid Build Coastguard Worker; 559*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i64c: 560*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 561*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 562*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 563*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0 564*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 565*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %ymm0, %ymm0 566*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %ymm0, %ymm2, %ymm0 567*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 568*9880d681SAndroid Build Coastguard Workerentry: 569*9880d681SAndroid Build Coastguard Worker %A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 > 570*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %A 571*9880d681SAndroid Build Coastguard Worker} 572*9880d681SAndroid Build Coastguard Worker 573*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { 574*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v32i8: 575*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 576*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm2, %xmm4 577*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 578*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 579*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm5 580*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 581*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm5 582*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm5 583*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 584*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm5 585*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 586*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 587*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 588*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 589*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm0 590*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm0 591*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm5, %xmm0 592*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm3, %xmm2 593*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 594*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 595*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm5 596*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 597*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm5 598*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm2, %xmm5 599*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm5 600*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 601*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 602*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 603*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 604*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm3, %xmm1 605*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm1 606*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm5, %xmm1 607*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 608*9880d681SAndroid Build Coastguard Worker; 609*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v32i8: 610*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 611*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm5 612*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 613*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm5, %xmm4 614*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] 615*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm4 616*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 617*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 618*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 619*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 620*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm0 621*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm0 622*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm0, %xmm4 623*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm3, %xmm0 624*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 625*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm0, %xmm2 626*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm2 627*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] 628*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 629*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 630*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm1 631*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm0, %xmm1 632*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm5, %xmm1 633*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm1, %xmm2 634*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm4, %xmm0 635*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm2, %xmm1 636*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 637*9880d681SAndroid Build Coastguard Worker; 638*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v32i8: 639*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 640*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 641*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 642*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm3, %ymm3 644*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2 645*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 646*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 647*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm3, %xmm3 648*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 649*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 650*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 651*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 652*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 653*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 654*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 655*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 656*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 657*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 658*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 659*9880d681SAndroid Build Coastguard Worker; 660*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v32i8: 661*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 662*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm2 663*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm3 664*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm2, %ymm3, %ymm2 665*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 666*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 667*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 668*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1 669*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 670*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 671*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 672*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 673*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 674*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 675*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 676*9880d681SAndroid Build Coastguard Worker; 677*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v32i8: 678*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 679*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 680*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 681*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 682*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 683*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 684*9880d681SAndroid Build Coastguard Workerentry: 685*9880d681SAndroid Build Coastguard Worker %A = mul <32 x i8> %i, %j 686*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %A 687*9880d681SAndroid Build Coastguard Worker} 688*9880d681SAndroid Build Coastguard Worker 689*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @mul_v16i16(<16 x i16> %i, <16 x i16> %j) nounwind { 690*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v16i16: 691*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 692*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw %xmm2, %xmm0 693*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw %xmm3, %xmm1 694*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 695*9880d681SAndroid Build Coastguard Worker; 696*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v16i16: 697*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 698*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 699*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 700*9880d681SAndroid Build Coastguard Workerentry: 701*9880d681SAndroid Build Coastguard Worker %A = mul <16 x i16> %i, %j 702*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %A 703*9880d681SAndroid Build Coastguard Worker} 704*9880d681SAndroid Build Coastguard Worker 705*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @mul_v8i32(<8 x i32> %i, <8 x i32> %j) nounwind { 706*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v8i32: 707*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 708*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 709*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm0 710*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 711*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 712*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm4, %xmm2 713*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 714*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 715*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 716*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm3, %xmm1 717*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 718*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 719*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm3 720*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 721*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 722*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 723*9880d681SAndroid Build Coastguard Worker; 724*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v8i32: 725*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 726*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld %xmm2, %xmm0 727*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld %xmm3, %xmm1 728*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 729*9880d681SAndroid Build Coastguard Worker; 730*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i32: 731*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 732*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 733*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 734*9880d681SAndroid Build Coastguard Workerentry: 735*9880d681SAndroid Build Coastguard Worker %A = mul <8 x i32> %i, %j 736*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %A 737*9880d681SAndroid Build Coastguard Worker} 738*9880d681SAndroid Build Coastguard Worker 739*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind { 740*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v4i64: 741*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 742*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm4 743*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm4 744*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm2, %xmm5 745*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm5 746*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm0, %xmm5 747*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm5 748*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm5, %xmm4 749*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm0 750*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm2, %xmm0 751*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm0 752*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm4, %xmm0 753*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm1, %xmm2 754*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm3, %xmm2 755*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm3, %xmm4 756*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm4 757*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm1, %xmm4 758*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm4 759*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm4, %xmm2 760*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlq $32, %xmm1 761*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmuludq %xmm3, %xmm1 762*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psllq $32, %xmm1 763*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm2, %xmm1 764*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 765*9880d681SAndroid Build Coastguard Worker; 766*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i64: 767*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 768*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 769*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %ymm1, %ymm3 770*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 771*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %ymm3, %ymm3 772*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %ymm3, %ymm2, %ymm2 773*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0 774*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 775*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsllq $32, %ymm0, %ymm0 776*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %ymm0, %ymm2, %ymm0 777*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 778*9880d681SAndroid Build Coastguard Workerentry: 779*9880d681SAndroid Build Coastguard Worker %A = mul <4 x i64> %i, %j 780*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %A 781*9880d681SAndroid Build Coastguard Worker} 782*9880d681SAndroid Build Coastguard Worker 783*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { 784*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v64i8c: 785*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 786*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] 787*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 788*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm6 789*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 790*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm6 791*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm6 792*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] 793*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm6 794*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 795*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 796*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm0 797*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm0 798*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm6, %xmm0 799*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm6 800*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 801*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm6 802*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm6 803*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm6 804*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 805*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 806*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm1 807*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm1 808*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm6, %xmm1 809*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm2, %xmm6 810*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 811*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm6 812*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm6 813*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm6 814*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 815*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 816*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm2 817*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm2 818*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm6, %xmm2 819*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm3, %xmm6 820*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 821*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm6 822*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm6 823*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm6 824*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 825*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 826*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm3 827*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm5, %xmm3 828*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm6, %xmm3 829*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 830*9880d681SAndroid Build Coastguard Worker; 831*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v64i8c: 832*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 833*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm1, %xmm4 834*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm0, %xmm1 835*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 836*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm6 837*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm0 838*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] 839*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm0 840*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 841*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm1 842*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm1 843*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm1 844*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm1, %xmm0 845*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm4, %xmm1 846*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm1 847*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm1 848*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1] 849*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm4, %xmm4 850*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm4 851*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm4 852*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm4, %xmm1 853*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm4 854*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm4 855*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm4 856*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 857*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 858*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm2 859*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm2 860*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm2, %xmm4 861*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm3, %xmm5 862*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm5 863*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm5 864*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] 865*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 866*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm6, %xmm2 867*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm7, %xmm2 868*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm2, %xmm5 869*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm4, %xmm2 870*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm5, %xmm3 871*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 872*9880d681SAndroid Build Coastguard Worker; 873*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v64i8c: 874*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 875*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 876*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 877*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 878*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 879*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 880*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 881*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4 882*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm2, %xmm2 883*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 884*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 885*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm3, %ymm0, %ymm0 886*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 887*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4 888*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm0, %xmm0 889*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] 890*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 891*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 892*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 893*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 894*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 895*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4 896*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm2, %xmm2 897*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 898*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 899*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1 900*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 901*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm3, %xmm3 902*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1 903*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 904*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 905*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 906*9880d681SAndroid Build Coastguard Worker; 907*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v64i8c: 908*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 909*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2 910*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 911*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 912*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 913*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 914*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 915*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 916*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0 917*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 918*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 919*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 920*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm2 921*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 922*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 923*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 924*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 925*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1 926*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1 927*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 928*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 929*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 930*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 931*9880d681SAndroid Build Coastguard Worker; 932*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v64i8c: 933*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 934*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] 935*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 936*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 937*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm1, %zmm2, %zmm2 938*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 939*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 940*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 941*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 942*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 943*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 944*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 945*9880d681SAndroid Build Coastguard Workerentry: 946*9880d681SAndroid Build Coastguard Worker %A = mul <64 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 > 947*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %A 948*9880d681SAndroid Build Coastguard Worker} 949*9880d681SAndroid Build Coastguard Worker 950*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { 951*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v64i8: 952*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 953*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm4, %xmm8 954*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm8 = xmm8[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 955*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm8 956*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm9 957*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 958*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm9 959*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm8, %xmm9 960*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255] 961*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm9 962*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 963*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 964*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 965*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm0 966*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm0 967*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm0 968*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm9, %xmm0 969*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm5, %xmm9 970*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 971*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm9 972*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm4 973*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 974*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 975*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm9, %xmm4 976*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm4 977*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 978*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm5 979*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 980*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 981*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm5, %xmm1 982*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm1 983*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm4, %xmm1 984*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm6, %xmm4 985*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 986*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 987*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm2, %xmm5 988*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 989*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm5 990*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm5 991*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm5 992*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 993*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm6 994*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 995*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 996*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm6, %xmm2 997*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm2 998*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm5, %xmm2 999*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm7, %xmm4 1000*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1001*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm4 1002*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm3, %xmm5 1003*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1004*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm5 1005*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm4, %xmm5 1006*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm5 1007*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1008*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm7 1009*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1010*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 1011*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm7, %xmm3 1012*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm8, %xmm3 1013*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm5, %xmm3 1014*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 1015*9880d681SAndroid Build Coastguard Worker; 1016*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v64i8: 1017*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 1018*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm1, %xmm8 1019*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm0, %xmm1 1020*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm4, %xmm9 1021*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 1022*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm9, %xmm0 1023*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255] 1024*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm0 1025*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1] 1026*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm4, %xmm4 1027*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1028*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm1, %xmm1 1029*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm1 1030*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm1 1031*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm1, %xmm0 1032*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm5, %xmm4 1033*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm8, %xmm1 1034*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm1 1035*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm1 1036*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1] 1037*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm4, %xmm4 1038*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,0,1] 1039*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm5, %xmm5 1040*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm4, %xmm5 1041*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm5 1042*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm5, %xmm1 1043*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm6, %xmm5 1044*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm4 1045*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm5, %xmm4 1046*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm4 1047*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,0,1] 1048*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm5, %xmm5 1049*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 1050*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 1051*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm5, %xmm2 1052*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm2 1053*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm2, %xmm4 1054*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm7, %xmm2 1055*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm3, %xmm5 1056*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm5 1057*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm5 1058*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm7[2,3,0,1] 1059*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 1060*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 1061*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm3, %xmm3 1062*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm3 1063*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm9, %xmm3 1064*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm3, %xmm5 1065*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm4, %xmm2 1066*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm5, %xmm3 1067*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 1068*9880d681SAndroid Build Coastguard Worker; 1069*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v64i8: 1070*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 1071*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 1072*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm4, %ymm4 1073*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5 1074*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm5, %ymm5 1075*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm5 1076*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 1077*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1078*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm6, %xmm6 1079*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm5, %xmm5 1080*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] 1081*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 1082*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 1083*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1084*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1085*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 1086*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 1087*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1088*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 1089*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm2 1090*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 1091*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 1092*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm5, %ymm5 1093*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm5, %ymm2 1094*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm5 1095*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm5, %xmm5 1096*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 1097*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] 1098*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm3, %ymm3 1099*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 1100*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1 1101*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 1102*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm3, %xmm3 1103*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 1104*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1105*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 1106*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1107*9880d681SAndroid Build Coastguard Worker; 1108*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v64i8: 1109*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 1110*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm4 1111*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm5 1112*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm4 1113*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm4, %zmm4 1114*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 1115*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 1116*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2 1117*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1118*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 1119*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1120*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1121*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1122*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 1123*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm3, %ymm2 1124*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm4 1125*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm2, %ymm4, %ymm2 1126*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 1127*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 1128*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm3 1129*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm3, %ymm3 1130*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 1131*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1 1132*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1 1133*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 1134*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1135*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1136*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1137*9880d681SAndroid Build Coastguard Worker; 1138*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v64i8: 1139*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 1140*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm2 1141*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm3 1142*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm2, %zmm3, %zmm2 1143*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 1144*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1145*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 1146*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1147*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 1148*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1149*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1150*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1151*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1152*9880d681SAndroid Build Coastguard Workerentry: 1153*9880d681SAndroid Build Coastguard Worker %A = mul <64 x i8> %i, %j 1154*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %A 1155*9880d681SAndroid Build Coastguard Worker} 1156*9880d681SAndroid Build Coastguard Worker 1157