xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/pmul.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
7*9880d681SAndroid Build Coastguard Worker
8*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind  {
9*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v16i8c:
10*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
11*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
12*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
13*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm2
14*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
15*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
16*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm1, %xmm2
17*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
18*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm3, %xmm2
19*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
20*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
21*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm1, %xmm0
22*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm3, %xmm0
23*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm2, %xmm0
24*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
25*9880d681SAndroid Build Coastguard Worker;
26*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v16i8c:
27*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
28*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm1
29*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw {{.*}}(%rip), %xmm2
30*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm2, %xmm1
31*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
32*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm3, %xmm1
33*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
34*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
35*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm2, %xmm0
36*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm3, %xmm0
37*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm0, %xmm1
38*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm1, %xmm0
39*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
40*9880d681SAndroid Build Coastguard Worker;
41*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v16i8c:
42*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
43*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
44*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm1
45*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
46*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
47*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
48*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
49*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
50*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
51*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
52*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
53*9880d681SAndroid Build Coastguard Worker;
54*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v16i8c:
55*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
56*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
57*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm1
58*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
59*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
60*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
61*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
62*9880d681SAndroid Build Coastguard Worker;
63*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v16i8c:
64*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
65*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %xmm0, %ymm0
66*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm1
67*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
68*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
69*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
70*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
71*9880d681SAndroid Build Coastguard Workerentry:
72*9880d681SAndroid Build Coastguard Worker  %A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
73*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %A
74*9880d681SAndroid Build Coastguard Worker}
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_v8i16c(<8 x i16> %i) nounwind  {
77*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v8i16c:
78*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
79*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
80*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
81*9880d681SAndroid Build Coastguard Worker;
82*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i16c:
83*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
84*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
85*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
86*9880d681SAndroid Build Coastguard Workerentry:
87*9880d681SAndroid Build Coastguard Worker  %A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 >
88*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %A
89*9880d681SAndroid Build Coastguard Worker}
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32c(<4 x i32> %i) nounwind  {
92*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32c:
93*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
94*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [117,117,117,117]
95*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
96*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm1, %xmm0
97*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
98*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm1, %xmm2
99*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
100*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
101*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
102*9880d681SAndroid Build Coastguard Worker;
103*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32c:
104*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
105*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
106*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
107*9880d681SAndroid Build Coastguard Worker;
108*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32c:
109*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
110*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
111*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
112*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
113*9880d681SAndroid Build Coastguard Workerentry:
114*9880d681SAndroid Build Coastguard Worker  %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
115*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %A
116*9880d681SAndroid Build Coastguard Worker}
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64c(<2 x i64> %i) nounwind  {
119*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64c:
120*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
121*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [117,117]
122*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm0, %xmm2
123*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm1, %xmm2
124*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm0
125*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm1, %xmm0
126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm0
127*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm0
128*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
129*9880d681SAndroid Build Coastguard Worker;
130*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64c:
131*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
132*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [117,117]
133*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
134*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
135*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
137*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
138*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
139*9880d681SAndroid Build Coastguard Workerentry:
140*9880d681SAndroid Build Coastguard Worker  %A = mul <2 x i64> %i, < i64 117, i64 117 >
141*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %A
142*9880d681SAndroid Build Coastguard Worker}
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind  {
145*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v16i8:
146*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
147*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm1, %xmm2
148*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
149*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
150*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm3
151*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
152*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
153*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm3
154*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
155*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm2, %xmm3
156*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
157*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
158*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
159*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
160*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm1, %xmm0
161*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm2, %xmm0
162*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm3, %xmm0
163*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
164*9880d681SAndroid Build Coastguard Worker;
165*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v16i8:
166*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
167*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm3
168*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
169*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm3, %xmm2
170*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
171*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm3, %xmm2
172*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
173*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm1
174*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
175*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
176*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm1, %xmm0
177*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm3, %xmm0
178*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm0, %xmm2
179*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm2, %xmm0
180*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
181*9880d681SAndroid Build Coastguard Worker;
182*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v16i8:
183*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
184*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
185*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
188*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
189*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
190*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
191*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
192*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
193*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
194*9880d681SAndroid Build Coastguard Worker;
195*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v16i8:
196*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
197*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm1
198*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
199*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
200*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
201*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
202*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker;
204*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v16i8:
205*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
206*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %xmm1, %ymm1
207*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %xmm0, %ymm0
208*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
209*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
210*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
211*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
212*9880d681SAndroid Build Coastguard Workerentry:
213*9880d681SAndroid Build Coastguard Worker  %A = mul <16 x i8> %i, %j
214*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %A
215*9880d681SAndroid Build Coastguard Worker}
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_v8i16(<8 x i16> %i, <8 x i16> %j) nounwind  {
218*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v8i16:
219*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
220*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm1, %xmm0
221*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
222*9880d681SAndroid Build Coastguard Worker;
223*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i16:
224*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
225*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
226*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
227*9880d681SAndroid Build Coastguard Workerentry:
228*9880d681SAndroid Build Coastguard Worker  %A = mul <8 x i16> %i, %j
229*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %A
230*9880d681SAndroid Build Coastguard Worker}
231*9880d681SAndroid Build Coastguard Worker
232*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32(<4 x i32> %i, <4 x i32> %j) nounwind  {
233*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32:
234*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
235*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
236*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm1, %xmm0
237*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
238*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
239*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm1
240*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
241*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
242*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
243*9880d681SAndroid Build Coastguard Worker;
244*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32:
245*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
246*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld %xmm1, %xmm0
247*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
248*9880d681SAndroid Build Coastguard Worker;
249*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32:
250*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
251*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
253*9880d681SAndroid Build Coastguard Workerentry:
254*9880d681SAndroid Build Coastguard Worker  %A = mul <4 x i32> %i, %j
255*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %A
256*9880d681SAndroid Build Coastguard Worker}
257*9880d681SAndroid Build Coastguard Worker
258*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64(<2 x i64> %i, <2 x i64> %j) nounwind  {
259*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64:
260*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
261*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm0, %xmm2
262*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm1, %xmm2
263*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, %xmm3
264*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm3
265*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm0, %xmm3
266*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm3
267*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm3, %xmm2
268*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm0
269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm1, %xmm0
270*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm0
271*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm0
272*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
273*9880d681SAndroid Build Coastguard Worker;
274*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64:
275*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
277*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %xmm1, %xmm3
278*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm3, %xmm0, %xmm3
279*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %xmm3, %xmm3
280*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
281*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %xmm0, %xmm0
282*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
283*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %xmm0, %xmm0
284*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
285*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
286*9880d681SAndroid Build Coastguard Workerentry:
287*9880d681SAndroid Build Coastguard Worker  %A = mul <2 x i64> %i, %j
288*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %A
289*9880d681SAndroid Build Coastguard Worker}
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Workerdeclare void @foo()
292*9880d681SAndroid Build Coastguard Worker
293*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_v4i32spill(<4 x i32> %i, <4 x i32> %j) nounwind  {
294*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v4i32spill:
295*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
296*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    subq $40, %rsp
297*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
298*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
299*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    callq foo
300*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
301*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
302*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
303*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm0
304*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
305*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
306*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm1, %xmm2
307*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
308*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
309*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    addq $40, %rsp
310*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
311*9880d681SAndroid Build Coastguard Worker;
312*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v4i32spill:
313*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
314*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    subq $40, %rsp
315*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
316*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
317*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    callq foo
318*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
319*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
320*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    addq $40, %rsp
321*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
322*9880d681SAndroid Build Coastguard Worker;
323*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i32spill:
324*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
325*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subq $40, %rsp
326*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
327*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
328*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    callq foo
329*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
330*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmulld {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
331*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    addq $40, %rsp
332*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
333*9880d681SAndroid Build Coastguard Workerentry:
334*9880d681SAndroid Build Coastguard Worker  ; Use a call to force spills.
335*9880d681SAndroid Build Coastguard Worker  call void @foo()
336*9880d681SAndroid Build Coastguard Worker  %A = mul <4 x i32> %i, %j
337*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %A
338*9880d681SAndroid Build Coastguard Worker}
339*9880d681SAndroid Build Coastguard Worker
340*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_v2i64spill(<2 x i64> %i, <2 x i64> %j) nounwind  {
341*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v2i64spill:
342*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
343*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subq $40, %rsp
344*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
345*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
346*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    callq foo
347*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
348*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm0, %xmm2
349*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
350*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm3, %xmm2
351*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm3, %xmm1
352*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm1
353*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm0, %xmm1
354*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm1
355*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm1, %xmm2
356*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm0
357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm3, %xmm0
358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm0
359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm0
360*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addq $40, %rsp
361*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
362*9880d681SAndroid Build Coastguard Worker;
363*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v2i64spill:
364*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
365*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subq $40, %rsp
366*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
367*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
368*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    callq foo
369*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
370*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rsp), %xmm3 # 16-byte Reload
371*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm2, %xmm3, %xmm0
372*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %xmm2, %xmm1
373*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
374*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %xmm1, %xmm1
375*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
376*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %xmm3, %xmm1
377*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
378*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %xmm1, %xmm1
379*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
380*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    addq $40, %rsp
381*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
382*9880d681SAndroid Build Coastguard Workerentry:
383*9880d681SAndroid Build Coastguard Worker  ; Use a call to force spills.
384*9880d681SAndroid Build Coastguard Worker  call void @foo()
385*9880d681SAndroid Build Coastguard Worker  %A = mul <2 x i64> %i, %j
386*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %A
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind  {
390*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v32i8c:
391*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
392*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
393*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
394*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm3
395*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
396*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
397*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm3
398*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
399*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm3
400*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
401*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
402*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm0
403*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm0
404*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm3, %xmm0
405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm1, %xmm3
406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm3
409*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm3
410*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
411*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
412*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm1
413*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm1
414*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm3, %xmm1
415*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
416*9880d681SAndroid Build Coastguard Worker;
417*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v32i8c:
418*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
419*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
420*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw {{.*}}(%rip), %xmm4
421*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm2
422*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
423*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm2
424*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
425*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
426*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm0
427*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm0
428*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm0, %xmm2
429*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm3
430*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm3
431*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm3
432*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
433*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
434*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm0
435*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm0
436*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm0, %xmm3
437*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm2, %xmm0
438*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm3, %xmm1
439*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
440*9880d681SAndroid Build Coastguard Worker;
441*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v32i8c:
442*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
443*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
444*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
445*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm2
446*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
447*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
448*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
449*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
450*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
451*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
452*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
453*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
454*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
455*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
456*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
457*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
458*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
459*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
460*9880d681SAndroid Build Coastguard Worker;
461*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v32i8c:
462*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
463*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm1
464*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm2
465*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
466*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
467*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
468*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
469*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
470*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
471*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
472*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
473*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
474*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
475*9880d681SAndroid Build Coastguard Worker;
476*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v32i8c:
477*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
478*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
479*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
480*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
481*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
482*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
483*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
484*9880d681SAndroid Build Coastguard Workerentry:
485*9880d681SAndroid Build Coastguard Worker  %A = mul <32 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
486*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %A
487*9880d681SAndroid Build Coastguard Worker}
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @mul_v16i16c(<16 x i16> %i) nounwind  {
490*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v16i16c:
491*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
492*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117]
493*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm2, %xmm0
494*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm2, %xmm1
495*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
496*9880d681SAndroid Build Coastguard Worker;
497*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v16i16c:
498*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
499*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
500*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
501*9880d681SAndroid Build Coastguard Workerentry:
502*9880d681SAndroid Build Coastguard Worker  %A = mul <16 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 >
503*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %A
504*9880d681SAndroid Build Coastguard Worker}
505*9880d681SAndroid Build Coastguard Worker
506*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @mul_v8i32c(<8 x i32> %i) nounwind  {
507*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v8i32c:
508*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
509*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [117,117,117,117]
510*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
511*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm0
512*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
513*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm3
514*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
517*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm1
518*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
519*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm3
520*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
521*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
522*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
523*9880d681SAndroid Build Coastguard Worker;
524*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v8i32c:
525*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
526*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [117,117,117,117]
527*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld %xmm2, %xmm0
528*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld %xmm2, %xmm1
529*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
530*9880d681SAndroid Build Coastguard Worker;
531*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i32c:
532*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
533*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
534*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
535*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
536*9880d681SAndroid Build Coastguard Workerentry:
537*9880d681SAndroid Build Coastguard Worker  %A = mul <8 x i32> %i, < i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117 >
538*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %A
539*9880d681SAndroid Build Coastguard Worker}
540*9880d681SAndroid Build Coastguard Worker
541*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mul_v4i64c(<4 x i64> %i) nounwind  {
542*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v4i64c:
543*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
544*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [117,117]
545*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm0, %xmm3
546*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm3
547*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm0
548*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm0
549*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm0
550*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm3, %xmm0
551*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, %xmm3
552*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm3
553*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm1
554*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm1
555*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm1
556*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm3, %xmm1
557*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
558*9880d681SAndroid Build Coastguard Worker;
559*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i64c:
560*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
561*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
562*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2
563*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
564*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
565*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %ymm0, %ymm0
566*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
567*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
568*9880d681SAndroid Build Coastguard Workerentry:
569*9880d681SAndroid Build Coastguard Worker  %A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 >
570*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %A
571*9880d681SAndroid Build Coastguard Worker}
572*9880d681SAndroid Build Coastguard Worker
573*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind  {
574*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v32i8:
575*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
576*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm2, %xmm4
577*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
578*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
579*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm5
580*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
581*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm5
582*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm5
583*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
584*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm5
585*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
586*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
587*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
588*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
589*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm0
590*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm0
591*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm5, %xmm0
592*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm3, %xmm2
593*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
594*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
595*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm1, %xmm5
596*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
597*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm5
598*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm2, %xmm5
599*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm5
600*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
601*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
602*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
603*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
604*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm3, %xmm1
605*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm4, %xmm1
606*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm5, %xmm1
607*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
608*9880d681SAndroid Build Coastguard Worker;
609*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v32i8:
610*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
611*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm5
612*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm4
613*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm5, %xmm4
614*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
615*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm4
616*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
617*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm2
618*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
619*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
620*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm2, %xmm0
621*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm0
622*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm0, %xmm4
623*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm3, %xmm0
624*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm2
625*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm0, %xmm2
626*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm2
627*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
628*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
629*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
630*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm1
631*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm0, %xmm1
632*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm5, %xmm1
633*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm1, %xmm2
634*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm4, %xmm0
635*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm2, %xmm1
636*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
637*9880d681SAndroid Build Coastguard Worker;
638*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v32i8:
639*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
640*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
641*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
642*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm3, %ymm3
644*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
645*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
646*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
647*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
648*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
649*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
650*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
651*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
652*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
653*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
654*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
655*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
656*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
657*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
658*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
659*9880d681SAndroid Build Coastguard Worker;
660*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v32i8:
661*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
662*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm2
663*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm3
664*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
665*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
666*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
667*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
668*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm1
669*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
670*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
671*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
672*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
673*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
674*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
675*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
676*9880d681SAndroid Build Coastguard Worker;
677*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v32i8:
678*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
679*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
680*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
681*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
682*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
683*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
684*9880d681SAndroid Build Coastguard Workerentry:
685*9880d681SAndroid Build Coastguard Worker  %A = mul <32 x i8> %i, %j
686*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %A
687*9880d681SAndroid Build Coastguard Worker}
688*9880d681SAndroid Build Coastguard Worker
689*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @mul_v16i16(<16 x i16> %i, <16 x i16> %j) nounwind  {
690*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v16i16:
691*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
692*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm2, %xmm0
693*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm3, %xmm1
694*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
695*9880d681SAndroid Build Coastguard Worker;
696*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v16i16:
697*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
698*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
699*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
700*9880d681SAndroid Build Coastguard Workerentry:
701*9880d681SAndroid Build Coastguard Worker  %A = mul <16 x i16> %i, %j
702*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %A
703*9880d681SAndroid Build Coastguard Worker}
704*9880d681SAndroid Build Coastguard Worker
705*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @mul_v8i32(<8 x i32> %i, <8 x i32> %j) nounwind  {
706*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v8i32:
707*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
708*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
709*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm0
710*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
711*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
712*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm4, %xmm2
713*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
714*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
715*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
716*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm3, %xmm1
717*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
718*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
719*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmuludq %xmm2, %xmm3
720*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
721*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
722*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
723*9880d681SAndroid Build Coastguard Worker;
724*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v8i32:
725*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
726*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld %xmm2, %xmm0
727*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmulld %xmm3, %xmm1
728*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
729*9880d681SAndroid Build Coastguard Worker;
730*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v8i32:
731*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
732*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
733*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
734*9880d681SAndroid Build Coastguard Workerentry:
735*9880d681SAndroid Build Coastguard Worker  %A = mul <8 x i32> %i, %j
736*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %A
737*9880d681SAndroid Build Coastguard Worker}
738*9880d681SAndroid Build Coastguard Worker
739*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind  {
740*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: mul_v4i64:
741*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
742*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm0, %xmm4
743*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm4
744*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm2, %xmm5
745*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm5
746*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm0, %xmm5
747*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm5
748*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm5, %xmm4
749*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm0
750*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm2, %xmm0
751*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm0
752*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm4, %xmm0
753*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, %xmm2
754*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm3, %xmm2
755*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm3, %xmm4
756*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm4
757*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm1, %xmm4
758*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm4
759*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm4, %xmm2
760*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psrlq $32, %xmm1
761*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmuludq %xmm3, %xmm1
762*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $32, %xmm1
763*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm1
764*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
765*9880d681SAndroid Build Coastguard Worker;
766*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: mul_v4i64:
767*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
768*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2
769*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %ymm1, %ymm3
770*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3
771*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %ymm3, %ymm3
772*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
773*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
774*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
775*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsllq $32, %ymm0, %ymm0
776*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
777*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
778*9880d681SAndroid Build Coastguard Workerentry:
779*9880d681SAndroid Build Coastguard Worker  %A = mul <4 x i64> %i, %j
780*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %A
781*9880d681SAndroid Build Coastguard Worker}
782*9880d681SAndroid Build Coastguard Worker
783*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind  {
784*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v64i8c:
785*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
786*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
787*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
788*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm6
789*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
790*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm6
791*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm6
792*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
793*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm6
794*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
795*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
796*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm0
797*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm0
798*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm6, %xmm0
799*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm1, %xmm6
800*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
801*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm6
802*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm6
803*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm6
804*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
805*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
806*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm1
807*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm1
808*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm6, %xmm1
809*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm2, %xmm6
810*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
811*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm6
812*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm6
813*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm6
814*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
815*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
816*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm2
817*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm2
818*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm6, %xmm2
819*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm3, %xmm6
820*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
821*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm6
822*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm6
823*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm6
824*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
825*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
826*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm3
827*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm5, %xmm3
828*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm6, %xmm3
829*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
830*9880d681SAndroid Build Coastguard Worker;
831*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v64i8c:
832*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
833*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm1, %xmm4
834*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm0, %xmm1
835*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm0
836*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw {{.*}}(%rip), %xmm6
837*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm0
838*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255]
839*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm0
840*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
841*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm1
842*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm1
843*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm1
844*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm1, %xmm0
845*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm4, %xmm1
846*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm1
847*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm1
848*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
849*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm4, %xmm4
850*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm4
851*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm4
852*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm4, %xmm1
853*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm4
854*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm4
855*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm4
856*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
857*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm2
858*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm2
859*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm2
860*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm2, %xmm4
861*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm3, %xmm5
862*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm5
863*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm5
864*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
865*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm2
866*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm6, %xmm2
867*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm7, %xmm2
868*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm2, %xmm5
869*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm4, %xmm2
870*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm5, %xmm3
871*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
872*9880d681SAndroid Build Coastguard Worker;
873*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v64i8c:
874*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
875*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
876*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
877*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
878*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
879*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
880*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
881*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
882*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
883*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
884*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
885*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm0, %ymm0
886*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
887*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
888*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm0, %xmm0
889*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
890*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
891*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
892*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
893*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
894*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
895*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
896*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
897*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
898*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
899*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
900*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
901*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm3, %xmm3
902*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
903*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
904*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
905*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
906*9880d681SAndroid Build Coastguard Worker;
907*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v64i8c:
908*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
909*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm2
910*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
911*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
912*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
913*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
914*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
915*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
916*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm3, %ymm0, %ymm0
917*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
918*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
919*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
920*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm2
921*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
922*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
923*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
924*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
925*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm1
926*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
927*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
928*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
929*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
930*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
931*9880d681SAndroid Build Coastguard Worker;
932*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v64i8c:
933*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
934*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
935*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
936*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm2
937*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm1, %zmm2, %zmm2
938*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
939*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
940*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
941*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
942*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
943*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
944*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
945*9880d681SAndroid Build Coastguard Workerentry:
946*9880d681SAndroid Build Coastguard Worker  %A = mul <64 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
947*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %A
948*9880d681SAndroid Build Coastguard Worker}
949*9880d681SAndroid Build Coastguard Worker
950*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind  {
951*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: mul_v64i8:
952*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
953*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm4, %xmm8
954*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
955*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm8
956*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm0, %xmm9
957*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
958*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm9
959*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm8, %xmm9
960*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255]
961*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm9
962*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
963*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
964*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
965*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm0
966*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm0
967*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm0
968*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm9, %xmm0
969*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm5, %xmm9
970*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
971*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm9
972*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm1, %xmm4
973*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
974*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
975*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm9, %xmm4
976*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm4
977*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
978*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm5
979*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
980*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm1
981*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm5, %xmm1
982*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm1
983*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm4, %xmm1
984*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm6, %xmm4
985*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
986*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
987*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm2, %xmm5
988*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
989*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm5
990*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm5
991*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm5
992*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
993*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm6
994*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
995*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm2
996*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm6, %xmm2
997*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm2
998*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm5, %xmm2
999*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm7, %xmm4
1000*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1001*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm4
1002*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa %xmm3, %xmm5
1003*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1004*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm5
1005*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm4, %xmm5
1006*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm5
1007*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1008*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm7
1009*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1010*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psraw $8, %xmm3
1011*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pmullw %xmm7, %xmm3
1012*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pand %xmm8, %xmm3
1013*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    packuswb %xmm5, %xmm3
1014*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
1015*9880d681SAndroid Build Coastguard Worker;
1016*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: mul_v64i8:
1017*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
1018*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm1, %xmm8
1019*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm0, %xmm1
1020*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm4, %xmm9
1021*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm0
1022*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm9, %xmm0
1023*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255]
1024*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm0
1025*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
1026*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm4, %xmm4
1027*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1028*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm1, %xmm1
1029*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm1
1030*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm1
1031*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm1, %xmm0
1032*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm5, %xmm4
1033*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm8, %xmm1
1034*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm1
1035*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm1
1036*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
1037*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm4, %xmm4
1038*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[2,3,0,1]
1039*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm5, %xmm5
1040*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm4, %xmm5
1041*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm5
1042*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm5, %xmm1
1043*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm6, %xmm5
1044*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm4
1045*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm5, %xmm4
1046*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm4
1047*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[2,3,0,1]
1048*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm5, %xmm5
1049*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
1050*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm2
1051*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm5, %xmm2
1052*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm2
1053*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm2, %xmm4
1054*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm7, %xmm2
1055*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm3, %xmm5
1056*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm2, %xmm5
1057*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm5
1058*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[2,3,0,1]
1059*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm2, %xmm2
1060*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
1061*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmovsxbw %xmm3, %xmm3
1062*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pmullw %xmm2, %xmm3
1063*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pand %xmm9, %xmm3
1064*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    packuswb %xmm3, %xmm5
1065*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm4, %xmm2
1066*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movdqa %xmm5, %xmm3
1067*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
1068*9880d681SAndroid Build Coastguard Worker;
1069*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mul_v64i8:
1070*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
1071*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
1072*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm4, %ymm4
1073*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm5
1074*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm5, %ymm5
1075*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm5
1076*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
1077*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1078*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm6, %xmm6
1079*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm5, %xmm5
1080*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
1081*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
1082*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
1083*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
1084*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1085*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
1086*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
1087*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1088*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm5, %ymm0, %ymm0
1089*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm2
1090*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
1091*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm5
1092*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm5, %ymm5
1093*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm5, %ymm2
1094*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm5
1095*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm5, %xmm5
1096*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
1097*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
1098*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm3, %ymm3
1099*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
1100*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
1101*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
1102*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
1103*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
1104*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1105*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
1106*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1107*9880d681SAndroid Build Coastguard Worker;
1108*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: mul_v64i8:
1109*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
1110*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm4
1111*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm5
1112*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
1113*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm4, %zmm4
1114*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm4, %xmm4
1115*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm2
1116*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
1117*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
1118*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm0, %ymm0
1119*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
1120*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1121*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1122*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
1123*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm3, %ymm2
1124*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm4
1125*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
1126*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm2, %zmm2
1127*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
1128*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm3
1129*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm3, %ymm3
1130*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
1131*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxbw %xmm1, %ymm1
1132*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
1133*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
1134*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
1135*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1136*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1137*9880d681SAndroid Build Coastguard Worker;
1138*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: mul_v64i8:
1139*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
1140*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm2
1141*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm3
1142*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
1143*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm2, %ymm2
1144*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
1145*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm1, %zmm1
1146*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
1147*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
1148*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
1149*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1150*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
1151*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1152*9880d681SAndroid Build Coastguard Workerentry:
1153*9880d681SAndroid Build Coastguard Worker  %A = mul <64 x i8> %i, %j
1154*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %A
1155*9880d681SAndroid Build Coastguard Worker}
1156*9880d681SAndroid Build Coastguard Worker
1157