xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/arm64-vmul.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
5*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull8h:
6*9880d681SAndroid Build Coastguard Worker;CHECK: smull.8h
7*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
8*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
9*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
10*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp3
11*9880d681SAndroid Build Coastguard Worker}
12*9880d681SAndroid Build Coastguard Worker
13*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
14*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull4s:
15*9880d681SAndroid Build Coastguard Worker;CHECK: smull.4s
16*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
17*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
18*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
19*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
20*9880d681SAndroid Build Coastguard Worker}
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
23*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull2d:
24*9880d681SAndroid Build Coastguard Worker;CHECK: smull.2d
25*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
26*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
27*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
28*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp3
29*9880d681SAndroid Build Coastguard Worker}
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16>  @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
32*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
33*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
36*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull8h:
37*9880d681SAndroid Build Coastguard Worker;CHECK: umull.8h
38*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
39*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
40*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
41*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp3
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
45*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull4s:
46*9880d681SAndroid Build Coastguard Worker;CHECK: umull.4s
47*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
48*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
49*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
50*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
51*9880d681SAndroid Build Coastguard Worker}
52*9880d681SAndroid Build Coastguard Worker
53*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
54*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull2d:
55*9880d681SAndroid Build Coastguard Worker;CHECK: umull.2d
56*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
57*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
58*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
59*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp3
60*9880d681SAndroid Build Coastguard Worker}
61*9880d681SAndroid Build Coastguard Worker
62*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16>  @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
63*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
64*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
65*9880d681SAndroid Build Coastguard Worker
66*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
67*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull4s:
68*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.4s
69*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
70*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
71*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
72*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
73*9880d681SAndroid Build Coastguard Worker}
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
76*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2d:
77*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.2d
78*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
79*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
80*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
81*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp3
82*9880d681SAndroid Build Coastguard Worker}
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
85*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_4s:
86*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.4s
87*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
88*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
89*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
90*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
91*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
92*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
96*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_2d:
97*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.2d
98*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
99*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
100*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
101*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
102*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
103*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp3
104*9880d681SAndroid Build Coastguard Worker}
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Worker
107*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
108*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
111*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: pmull8h:
112*9880d681SAndroid Build Coastguard Worker;CHECK: pmull.8h
113*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i8>, <8 x i8>* %A
114*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i8>, <8 x i8>* %B
115*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
116*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp3
117*9880d681SAndroid Build Coastguard Worker}
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
122*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_4h:
123*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4h
124*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
125*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
126*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
127*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %tmp3
128*9880d681SAndroid Build Coastguard Worker}
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
131*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_8h:
132*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.8h
133*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
134*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i16>, <8 x i16>* %B
135*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
136*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp3
137*9880d681SAndroid Build Coastguard Worker}
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
140*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_2s:
141*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.2s
142*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
143*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
144*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
145*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %tmp3
146*9880d681SAndroid Build Coastguard Worker}
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
149*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_4s:
150*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4s
151*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
152*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i32>, <4 x i32>* %B
153*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
154*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
155*9880d681SAndroid Build Coastguard Worker}
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
158*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_1s:
159*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
160*9880d681SAndroid Build Coastguard Worker  %tmp1 = load i32, i32* %A
161*9880d681SAndroid Build Coastguard Worker  %tmp2 = load i32, i32* %B
162*9880d681SAndroid Build Coastguard Worker  %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
163*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp3
164*9880d681SAndroid Build Coastguard Worker}
165*9880d681SAndroid Build Coastguard Worker
166*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
167*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
168*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
169*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
170*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone
171*9880d681SAndroid Build Coastguard Worker
172*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
173*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_4h:
174*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4h
175*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
176*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
177*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
178*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %tmp3
179*9880d681SAndroid Build Coastguard Worker}
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
182*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_8h:
183*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.8h
184*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
185*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i16>, <8 x i16>* %B
186*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
187*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp3
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
191*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_2s:
192*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.2s
193*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
194*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
195*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
196*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %tmp3
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
200*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_4s:
201*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4s
202*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
203*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i32>, <4 x i32>* %B
204*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
205*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp3
206*9880d681SAndroid Build Coastguard Worker}
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdefine i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
209*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_1s:
210*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
211*9880d681SAndroid Build Coastguard Worker  %tmp1 = load i32, i32* %A
212*9880d681SAndroid Build Coastguard Worker  %tmp2 = load i32, i32* %B
213*9880d681SAndroid Build Coastguard Worker  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
214*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp3
215*9880d681SAndroid Build Coastguard Worker}
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
218*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
219*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
220*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
221*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
222*9880d681SAndroid Build Coastguard Worker
223*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
224*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_2s:
225*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2s
226*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
227*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
228*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
229*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp3
230*9880d681SAndroid Build Coastguard Worker}
231*9880d681SAndroid Build Coastguard Worker
232*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
233*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_4s:
234*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.4s
235*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
236*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
237*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
238*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp3
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
242*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_2d:
243*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2d
244*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
245*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
246*9880d681SAndroid Build Coastguard Worker  %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
247*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp3
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Worker
250*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
251*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
252*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
255*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal4s:
256*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.4s
257*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
258*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
259*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
260*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
261*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <4 x i32> %tmp3, %tmp4
262*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
263*9880d681SAndroid Build Coastguard Worker}
264*9880d681SAndroid Build Coastguard Worker
265*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
266*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal2d:
267*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.2d
268*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
269*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
270*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
271*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
272*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <2 x i64> %tmp3, %tmp4
273*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
274*9880d681SAndroid Build Coastguard Worker}
275*9880d681SAndroid Build Coastguard Worker
276*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
277*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl4s:
278*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.4s
279*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
280*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
281*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
282*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
283*9880d681SAndroid Build Coastguard Worker  %tmp5 = sub <4 x i32> %tmp3, %tmp4
284*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
285*9880d681SAndroid Build Coastguard Worker}
286*9880d681SAndroid Build Coastguard Worker
287*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
288*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl2d:
289*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.2d
290*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
291*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
292*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
293*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
294*9880d681SAndroid Build Coastguard Worker  %tmp5 = sub <2 x i64> %tmp3, %tmp4
295*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
296*9880d681SAndroid Build Coastguard Worker}
297*9880d681SAndroid Build Coastguard Worker
298*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
299*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
300*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
301*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
304*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal4s:
305*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s
306*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
307*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
308*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
309*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
310*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
311*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
312*9880d681SAndroid Build Coastguard Worker}
313*9880d681SAndroid Build Coastguard Worker
314*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
315*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2d:
316*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.2d
317*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
318*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
319*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
320*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
321*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
322*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
323*9880d681SAndroid Build Coastguard Worker}
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
326*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_4s:
327*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.4s
328*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
329*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
330*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
331*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
332*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
333*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
334*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
335*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
336*9880d681SAndroid Build Coastguard Worker}
337*9880d681SAndroid Build Coastguard Worker
338*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
339*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_2d:
340*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.2d
341*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
342*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
343*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
344*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
345*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
346*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
347*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
348*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
349*9880d681SAndroid Build Coastguard Worker}
350*9880d681SAndroid Build Coastguard Worker
351*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
352*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl4s:
353*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s
354*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
355*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
356*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
357*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
358*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
359*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
360*9880d681SAndroid Build Coastguard Worker}
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
363*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2d:
364*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.2d
365*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
366*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
367*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
368*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
369*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
370*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
371*9880d681SAndroid Build Coastguard Worker}
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
374*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_4s:
375*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.4s
376*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
377*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
378*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
379*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
380*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
381*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
382*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
383*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
384*9880d681SAndroid Build Coastguard Worker}
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
387*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_2d:
388*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.2d
389*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
390*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
391*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
392*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
393*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
394*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
395*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
396*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
397*9880d681SAndroid Build Coastguard Worker}
398*9880d681SAndroid Build Coastguard Worker
399*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
400*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal4s:
401*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.4s
402*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
403*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
404*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
405*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
406*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <4 x i32> %tmp3, %tmp4
407*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
408*9880d681SAndroid Build Coastguard Worker}
409*9880d681SAndroid Build Coastguard Worker
410*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
411*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal2d:
412*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.2d
413*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
414*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
415*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
416*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
417*9880d681SAndroid Build Coastguard Worker  %tmp5 = add <2 x i64> %tmp3, %tmp4
418*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
419*9880d681SAndroid Build Coastguard Worker}
420*9880d681SAndroid Build Coastguard Worker
421*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
422*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl4s:
423*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.4s
424*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
425*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
426*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
427*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
428*9880d681SAndroid Build Coastguard Worker  %tmp5 = sub <4 x i32> %tmp3, %tmp4
429*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp5
430*9880d681SAndroid Build Coastguard Worker}
431*9880d681SAndroid Build Coastguard Worker
432*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
433*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl2d:
434*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.2d
435*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
436*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
437*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
438*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
439*9880d681SAndroid Build Coastguard Worker  %tmp5 = sub <2 x i64> %tmp3, %tmp4
440*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp5
441*9880d681SAndroid Build Coastguard Worker}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
444*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_2s:
445*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.2s
446*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
447*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
448*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x float>, <2 x float>* %C
449*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
450*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp4
451*9880d681SAndroid Build Coastguard Worker}
452*9880d681SAndroid Build Coastguard Worker
453*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
454*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_4s:
455*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.4s
456*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
457*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
458*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x float>, <4 x float>* %C
459*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
460*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp4
461*9880d681SAndroid Build Coastguard Worker}
462*9880d681SAndroid Build Coastguard Worker
463*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
464*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_2d:
465*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.2d
466*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
467*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
468*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x double>, <2 x double>* %C
469*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
470*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp4
471*9880d681SAndroid Build Coastguard Worker}
472*9880d681SAndroid Build Coastguard Worker
473*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
474*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
475*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
476*9880d681SAndroid Build Coastguard Worker
477*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
478*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_2s:
479*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s
480*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
481*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
482*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x float>, <2 x float>* %C
483*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
484*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3)
485*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp5
486*9880d681SAndroid Build Coastguard Worker}
487*9880d681SAndroid Build Coastguard Worker
488*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
489*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_4s:
490*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s
491*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
492*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
493*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x float>, <4 x float>* %C
494*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
495*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3)
496*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp5
497*9880d681SAndroid Build Coastguard Worker}
498*9880d681SAndroid Build Coastguard Worker
499*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
500*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_2d:
501*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d
502*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
503*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
504*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x double>, <2 x double>* %C
505*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
506*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3)
507*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp5
508*9880d681SAndroid Build Coastguard Worker}
509*9880d681SAndroid Build Coastguard Worker
510*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
511*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_2s:
512*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s
513*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
514*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
515*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x float>, <2 x float>* %C
516*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
517*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3)
518*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp5
519*9880d681SAndroid Build Coastguard Worker}
520*9880d681SAndroid Build Coastguard Worker
521*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
522*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_4s:
523*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s
524*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
525*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
526*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x float>, <4 x float>* %C
527*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
528*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3)
529*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp5
530*9880d681SAndroid Build Coastguard Worker}
531*9880d681SAndroid Build Coastguard Worker
532*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
533*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_2d:
534*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d
535*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
536*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
537*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x double>, <2 x double>* %C
538*9880d681SAndroid Build Coastguard Worker  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
539*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3)
540*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp5
541*9880d681SAndroid Build Coastguard Worker}
542*9880d681SAndroid Build Coastguard Worker
543*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
544*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_2s:
545*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s
546*9880d681SAndroid Build Coastguard Workerentry:
547*9880d681SAndroid Build Coastguard Worker  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c
548*9880d681SAndroid Build Coastguard Worker  %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer
549*9880d681SAndroid Build Coastguard Worker  %fmls1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a)
550*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %fmls1
551*9880d681SAndroid Build Coastguard Worker}
552*9880d681SAndroid Build Coastguard Worker
553*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
554*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_4s:
555*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s
556*9880d681SAndroid Build Coastguard Workerentry:
557*9880d681SAndroid Build Coastguard Worker  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
558*9880d681SAndroid Build Coastguard Worker  %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
559*9880d681SAndroid Build Coastguard Worker  %fmls1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a)
560*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %fmls1
561*9880d681SAndroid Build Coastguard Worker}
562*9880d681SAndroid Build Coastguard Worker
563*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp {
564*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_2d:
565*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d
566*9880d681SAndroid Build Coastguard Workerentry:
567*9880d681SAndroid Build Coastguard Worker  %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
568*9880d681SAndroid Build Coastguard Worker  %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
569*9880d681SAndroid Build Coastguard Worker  %fmls1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a)
570*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %fmls1
571*9880d681SAndroid Build Coastguard Worker}
572*9880d681SAndroid Build Coastguard Worker
573*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
574*9880d681SAndroid Build Coastguard Workerentry:
575*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_2s:
576*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.2s
577*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
578*9880d681SAndroid Build Coastguard Worker  %v1 = insertelement <2 x float> undef, float %c, i32 0
579*9880d681SAndroid Build Coastguard Worker  %v2 = insertelement <2 x float> %v1, float %c, i32 1
580*9880d681SAndroid Build Coastguard Worker  %fmla1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %v1, <2 x float> %b, <2 x float> %a) nounwind
581*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %fmla1
582*9880d681SAndroid Build Coastguard Worker}
583*9880d681SAndroid Build Coastguard Worker
584*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
585*9880d681SAndroid Build Coastguard Workerentry:
586*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_4s:
587*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.4s
588*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
589*9880d681SAndroid Build Coastguard Worker  %v1 = insertelement <4 x float> undef, float %c, i32 0
590*9880d681SAndroid Build Coastguard Worker  %v2 = insertelement <4 x float> %v1, float %c, i32 1
591*9880d681SAndroid Build Coastguard Worker  %v3 = insertelement <4 x float> %v2, float %c, i32 2
592*9880d681SAndroid Build Coastguard Worker  %v4 = insertelement <4 x float> %v3, float %c, i32 3
593*9880d681SAndroid Build Coastguard Worker  %fmla1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a) nounwind
594*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %fmla1
595*9880d681SAndroid Build Coastguard Worker}
596*9880d681SAndroid Build Coastguard Worker
597*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
598*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_2d:
599*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.2d
600*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
601*9880d681SAndroid Build Coastguard Workerentry:
602*9880d681SAndroid Build Coastguard Worker  %v1 = insertelement <2 x double> undef, double %c, i32 0
603*9880d681SAndroid Build Coastguard Worker  %v2 = insertelement <2 x double> %v1, double %c, i32 1
604*9880d681SAndroid Build Coastguard Worker  %fmla1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a) nounwind
605*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %fmla1
606*9880d681SAndroid Build Coastguard Worker}
607*9880d681SAndroid Build Coastguard Worker
608*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
609*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_4h:
610*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
611*9880d681SAndroid Build Coastguard Worker;CHECK: mul.4h
612*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
613*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
614*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
615*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i16> %tmp1, %tmp3
616*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %tmp4
617*9880d681SAndroid Build Coastguard Worker}
618*9880d681SAndroid Build Coastguard Worker
619*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
620*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_8h:
621*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
622*9880d681SAndroid Build Coastguard Worker;CHECK: mul.8h
623*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
624*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i16>, <8 x i16>* %B
625*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
626*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <8 x i16> %tmp1, %tmp3
627*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
628*9880d681SAndroid Build Coastguard Worker}
629*9880d681SAndroid Build Coastguard Worker
630*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @mul_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
631*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_2s:
632*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
633*9880d681SAndroid Build Coastguard Worker;CHECK: mul.2s
634*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
635*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
636*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
637*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <2 x i32> %tmp1, %tmp3
638*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %tmp4
639*9880d681SAndroid Build Coastguard Worker}
640*9880d681SAndroid Build Coastguard Worker
641*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
642*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_4s:
643*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
644*9880d681SAndroid Build Coastguard Worker;CHECK: mul.4s
645*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
646*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i32>, <4 x i32>* %B
647*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
648*9880d681SAndroid Build Coastguard Worker  %tmp4 = mul <4 x i32> %tmp1, %tmp3
649*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
650*9880d681SAndroid Build Coastguard Worker}
651*9880d681SAndroid Build Coastguard Worker
652*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
653*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2d:
654*9880d681SAndroid Build Coastguard Worker; CHECK: mul
655*9880d681SAndroid Build Coastguard Worker; CHECK: mul
656*9880d681SAndroid Build Coastguard Worker  %tmp1 = mul <2 x i64> %A, %B
657*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp1
658*9880d681SAndroid Build Coastguard Worker}
659*9880d681SAndroid Build Coastguard Worker
660*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmul_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
661*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_2s:
662*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
663*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.2s
664*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
665*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
666*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
667*9880d681SAndroid Build Coastguard Worker  %tmp4 = fmul <2 x float> %tmp1, %tmp3
668*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp4
669*9880d681SAndroid Build Coastguard Worker}
670*9880d681SAndroid Build Coastguard Worker
671*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmul_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
672*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_4s:
673*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
674*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.4s
675*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
676*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
677*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
678*9880d681SAndroid Build Coastguard Worker  %tmp4 = fmul <4 x float> %tmp1, %tmp3
679*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp4
680*9880d681SAndroid Build Coastguard Worker}
681*9880d681SAndroid Build Coastguard Worker
682*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmul_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
683*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_2d:
684*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
685*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.2d
686*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
687*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
688*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
689*9880d681SAndroid Build Coastguard Worker  %tmp4 = fmul <2 x double> %tmp1, %tmp3
690*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp4
691*9880d681SAndroid Build Coastguard Worker}
692*9880d681SAndroid Build Coastguard Worker
693*9880d681SAndroid Build Coastguard Workerdefine float @fmul_lane_s(float %A, <4 x float> %vec) nounwind {
694*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_s:
695*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
696*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.s s0, s0, v1[3]
697*9880d681SAndroid Build Coastguard Worker  %B = extractelement <4 x float> %vec, i32 3
698*9880d681SAndroid Build Coastguard Worker  %res = fmul float %A, %B
699*9880d681SAndroid Build Coastguard Worker  ret float %res
700*9880d681SAndroid Build Coastguard Worker}
701*9880d681SAndroid Build Coastguard Worker
702*9880d681SAndroid Build Coastguard Workerdefine double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
703*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_d:
704*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
705*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.d d0, d0, v1[1]
706*9880d681SAndroid Build Coastguard Worker  %B = extractelement <2 x double> %vec, i32 1
707*9880d681SAndroid Build Coastguard Worker  %res = fmul double %A, %B
708*9880d681SAndroid Build Coastguard Worker  ret double %res
709*9880d681SAndroid Build Coastguard Worker}
710*9880d681SAndroid Build Coastguard Worker
711*9880d681SAndroid Build Coastguard Worker
712*9880d681SAndroid Build Coastguard Worker
713*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
714*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_2s:
715*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
716*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2s
717*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x float>, <2 x float>* %A
718*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x float>, <2 x float>* %B
719*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
720*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
721*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %tmp4
722*9880d681SAndroid Build Coastguard Worker}
723*9880d681SAndroid Build Coastguard Worker
724*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
725*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_4s:
726*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
727*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.4s
728*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x float>, <4 x float>* %A
729*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x float>, <4 x float>* %B
730*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
731*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
732*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %tmp4
733*9880d681SAndroid Build Coastguard Worker}
734*9880d681SAndroid Build Coastguard Worker
735*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
736*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_2d:
737*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
738*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2d
739*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x double>, <2 x double>* %A
740*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x double>, <2 x double>* %B
741*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
742*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
743*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %tmp4
744*9880d681SAndroid Build Coastguard Worker}
745*9880d681SAndroid Build Coastguard Worker
746*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
747*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_4h:
748*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
749*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4h
750*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
751*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
752*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
753*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
754*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %tmp4
755*9880d681SAndroid Build Coastguard Worker}
756*9880d681SAndroid Build Coastguard Worker
757*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
758*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_8h:
759*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
760*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.8h
761*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
762*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i16>, <8 x i16>* %B
763*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
764*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
765*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
766*9880d681SAndroid Build Coastguard Worker}
767*9880d681SAndroid Build Coastguard Worker
768*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
769*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_2s:
770*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
771*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.2s
772*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
773*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
774*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
775*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
776*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %tmp4
777*9880d681SAndroid Build Coastguard Worker}
778*9880d681SAndroid Build Coastguard Worker
779*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
780*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_4s:
781*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
782*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4s
783*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
784*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i32>, <4 x i32>* %B
785*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
786*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
787*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
788*9880d681SAndroid Build Coastguard Worker}
789*9880d681SAndroid Build Coastguard Worker
790*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
791*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_1s:
792*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
793*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
794*9880d681SAndroid Build Coastguard Worker  %tmp1 = extractelement <4 x i32> %B, i32 1
795*9880d681SAndroid Build Coastguard Worker  %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
796*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp2
797*9880d681SAndroid Build Coastguard Worker}
798*9880d681SAndroid Build Coastguard Worker
799*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
800*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_4h:
801*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
802*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4h
803*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
804*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
805*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
806*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
807*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %tmp4
808*9880d681SAndroid Build Coastguard Worker}
809*9880d681SAndroid Build Coastguard Worker
810*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
811*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_8h:
812*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
813*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.8h
814*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <8 x i16>, <8 x i16>* %A
815*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <8 x i16>, <8 x i16>* %B
816*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
817*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
818*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp4
819*9880d681SAndroid Build Coastguard Worker}
820*9880d681SAndroid Build Coastguard Worker
821*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
822*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_2s:
823*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
824*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.2s
825*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
826*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
827*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
828*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
829*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %tmp4
830*9880d681SAndroid Build Coastguard Worker}
831*9880d681SAndroid Build Coastguard Worker
832*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
833*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_4s:
834*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
835*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4s
836*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i32>, <4 x i32>* %A
837*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i32>, <4 x i32>* %B
838*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
839*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
840*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
841*9880d681SAndroid Build Coastguard Worker}
842*9880d681SAndroid Build Coastguard Worker
843*9880d681SAndroid Build Coastguard Workerdefine i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
844*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_1s:
845*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
846*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
847*9880d681SAndroid Build Coastguard Worker  %tmp1 = extractelement <4 x i32> %B, i32 1
848*9880d681SAndroid Build Coastguard Worker  %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
849*9880d681SAndroid Build Coastguard Worker  ret i32 %tmp2
850*9880d681SAndroid Build Coastguard Worker}
851*9880d681SAndroid Build Coastguard Worker
852*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
853*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull_lane_4s:
854*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
855*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.4s
856*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
857*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
858*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
859*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
860*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
861*9880d681SAndroid Build Coastguard Worker}
862*9880d681SAndroid Build Coastguard Worker
863*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
864*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull_lane_2d:
865*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
866*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.2d
867*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
868*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
869*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
870*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
871*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
872*9880d681SAndroid Build Coastguard Worker}
873*9880d681SAndroid Build Coastguard Worker
874*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
875*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_lane_4s:
876*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
877*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.4s
878*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
879*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
880*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
881*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
882*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
883*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
884*9880d681SAndroid Build Coastguard Worker}
885*9880d681SAndroid Build Coastguard Worker
886*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
887*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_lane_2d:
888*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
889*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.2d
890*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
891*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
892*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
893*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
894*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
895*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
896*9880d681SAndroid Build Coastguard Worker}
897*9880d681SAndroid Build Coastguard Worker
898*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
899*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull_lane_4s:
900*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
901*9880d681SAndroid Build Coastguard Worker;CHECK: umull.4s
902*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
903*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
904*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
905*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
906*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
907*9880d681SAndroid Build Coastguard Worker}
908*9880d681SAndroid Build Coastguard Worker
909*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
910*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull_lane_2d:
911*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
912*9880d681SAndroid Build Coastguard Worker;CHECK: umull.2d
913*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
914*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
915*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
916*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
917*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
918*9880d681SAndroid Build Coastguard Worker}
919*9880d681SAndroid Build Coastguard Worker
920*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
921*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull_lane_4s:
922*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
923*9880d681SAndroid Build Coastguard Worker;CHECK: smull.4s
924*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
925*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
926*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
927*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
928*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp4
929*9880d681SAndroid Build Coastguard Worker}
930*9880d681SAndroid Build Coastguard Worker
931*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
932*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull_lane_2d:
933*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
934*9880d681SAndroid Build Coastguard Worker;CHECK: smull.2d
935*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
936*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
937*9880d681SAndroid Build Coastguard Worker  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
938*9880d681SAndroid Build Coastguard Worker  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
939*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp4
940*9880d681SAndroid Build Coastguard Worker}
941*9880d681SAndroid Build Coastguard Worker
942*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
943*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal_lane_4s:
944*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
945*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.4s
946*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
947*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
948*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
949*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
950*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
951*9880d681SAndroid Build Coastguard Worker  %tmp6 = add <4 x i32> %tmp3, %tmp5
952*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
953*9880d681SAndroid Build Coastguard Worker}
954*9880d681SAndroid Build Coastguard Worker
955*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
956*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal_lane_2d:
957*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
958*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.2d
959*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
960*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
961*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
962*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
963*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
964*9880d681SAndroid Build Coastguard Worker  %tmp6 = add <2 x i64> %tmp3, %tmp5
965*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
966*9880d681SAndroid Build Coastguard Worker}
967*9880d681SAndroid Build Coastguard Worker
968*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
969*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_4s:
970*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
971*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s
972*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
973*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
974*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
975*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
976*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
977*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
978*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
979*9880d681SAndroid Build Coastguard Worker}
980*9880d681SAndroid Build Coastguard Worker
981*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
982*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_2d:
983*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
984*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.2d
985*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
986*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
987*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
988*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
989*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
990*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
991*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
992*9880d681SAndroid Build Coastguard Worker}
993*9880d681SAndroid Build Coastguard Worker
994*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
995*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_lane_4s:
996*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
997*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.4s
998*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
999*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
1000*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1001*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1002*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1003*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
1004*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
1005*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1006*9880d681SAndroid Build Coastguard Worker}
1007*9880d681SAndroid Build Coastguard Worker
1008*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
1009*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_lane_2d:
1010*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1011*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.2d
1012*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
1013*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
1014*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1015*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1016*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
1017*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
1018*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
1019*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1020*9880d681SAndroid Build Coastguard Worker}
1021*9880d681SAndroid Build Coastguard Worker
1022*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
1023*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_1s:
1024*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s
1025*9880d681SAndroid Build Coastguard Worker  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
1026*9880d681SAndroid Build Coastguard Worker  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1027*9880d681SAndroid Build Coastguard Worker  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
1028*9880d681SAndroid Build Coastguard Worker  %prod = extractelement <4 x i32> %prod.vec, i32 0
1029*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
1030*9880d681SAndroid Build Coastguard Worker  ret i32 %res
1031*9880d681SAndroid Build Coastguard Worker}
1032*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
1033*9880d681SAndroid Build Coastguard Worker
1034*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
1035*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_1s:
1036*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s
1037*9880d681SAndroid Build Coastguard Worker  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
1038*9880d681SAndroid Build Coastguard Worker  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1039*9880d681SAndroid Build Coastguard Worker  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
1040*9880d681SAndroid Build Coastguard Worker  %prod = extractelement <4 x i32> %prod.vec, i32 0
1041*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
1042*9880d681SAndroid Build Coastguard Worker  ret i32 %res
1043*9880d681SAndroid Build Coastguard Worker}
1044*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
1045*9880d681SAndroid Build Coastguard Worker
1046*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
1047*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_1d:
1048*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.s
1049*9880d681SAndroid Build Coastguard Worker  %rhs = extractelement <2 x i32> %C, i32 1
1050*9880d681SAndroid Build Coastguard Worker  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
1051*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod)
1052*9880d681SAndroid Build Coastguard Worker  ret i64 %res
1053*9880d681SAndroid Build Coastguard Worker}
1054*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32)
1055*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64)
1056*9880d681SAndroid Build Coastguard Worker
1057*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
1058*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_1d:
1059*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.s
1060*9880d681SAndroid Build Coastguard Worker  %rhs = extractelement <2 x i32> %C, i32 1
1061*9880d681SAndroid Build Coastguard Worker  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
1062*9880d681SAndroid Build Coastguard Worker  %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod)
1063*9880d681SAndroid Build Coastguard Worker  ret i64 %res
1064*9880d681SAndroid Build Coastguard Worker}
1065*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
1066*9880d681SAndroid Build Coastguard Worker
1067*9880d681SAndroid Build Coastguard Worker
1068*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
1069*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal_lane_4s:
1070*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1071*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.4s
1072*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
1073*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
1074*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1075*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1076*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
1077*9880d681SAndroid Build Coastguard Worker  %tmp6 = add <4 x i32> %tmp3, %tmp5
1078*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1079*9880d681SAndroid Build Coastguard Worker}
1080*9880d681SAndroid Build Coastguard Worker
1081*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
1082*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal_lane_2d:
1083*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1084*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.2d
1085*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
1086*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
1087*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1088*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
1089*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
1090*9880d681SAndroid Build Coastguard Worker  %tmp6 = add <2 x i64> %tmp3, %tmp5
1091*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1092*9880d681SAndroid Build Coastguard Worker}
1093*9880d681SAndroid Build Coastguard Worker
1094*9880d681SAndroid Build Coastguard Worker
1095*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
1096*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl_lane_4s:
1097*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1098*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.4s
1099*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
1100*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
1101*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1102*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1103*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
1104*9880d681SAndroid Build Coastguard Worker  %tmp6 = sub <4 x i32> %tmp3, %tmp5
1105*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1106*9880d681SAndroid Build Coastguard Worker}
1107*9880d681SAndroid Build Coastguard Worker
1108*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
1109*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl_lane_2d:
1110*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1111*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.2d
1112*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
1113*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
1114*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1115*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
1116*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
1117*9880d681SAndroid Build Coastguard Worker  %tmp6 = sub <2 x i64> %tmp3, %tmp5
1118*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1119*9880d681SAndroid Build Coastguard Worker}
1120*9880d681SAndroid Build Coastguard Worker
1121*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
1122*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_4s:
1123*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1124*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s
1125*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
1126*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
1127*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1128*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1129*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
1130*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
1131*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1132*9880d681SAndroid Build Coastguard Worker}
1133*9880d681SAndroid Build Coastguard Worker
1134*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
1135*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_2d:
1136*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1137*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.2d
1138*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
1139*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
1140*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1141*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
1142*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
1143*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
1144*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1145*9880d681SAndroid Build Coastguard Worker}
1146*9880d681SAndroid Build Coastguard Worker
1147*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
1148*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_lane_4s:
1149*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1150*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.4s
1151*9880d681SAndroid Build Coastguard Worker  %load1 = load <8 x i16>, <8 x i16>* %A
1152*9880d681SAndroid Build Coastguard Worker  %load2 = load <8 x i16>, <8 x i16>* %B
1153*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1154*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1155*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1156*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
1157*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
1158*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1159*9880d681SAndroid Build Coastguard Worker}
1160*9880d681SAndroid Build Coastguard Worker
1161*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
1162*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_lane_2d:
1163*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1164*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.2d
1165*9880d681SAndroid Build Coastguard Worker  %load1 = load <4 x i32>, <4 x i32>* %A
1166*9880d681SAndroid Build Coastguard Worker  %load2 = load <4 x i32>, <4 x i32>* %B
1167*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1168*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1169*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
1170*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
1171*9880d681SAndroid Build Coastguard Worker  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
1172*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1173*9880d681SAndroid Build Coastguard Worker}
1174*9880d681SAndroid Build Coastguard Worker
1175*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
1176*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl_lane_4s:
1177*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1178*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.4s
1179*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <4 x i16>, <4 x i16>* %A
1180*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <4 x i16>, <4 x i16>* %B
1181*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <4 x i32>, <4 x i32>* %C
1182*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1183*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
1184*9880d681SAndroid Build Coastguard Worker  %tmp6 = sub <4 x i32> %tmp3, %tmp5
1185*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %tmp6
1186*9880d681SAndroid Build Coastguard Worker}
1187*9880d681SAndroid Build Coastguard Worker
1188*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
1189*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl_lane_2d:
1190*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup
1191*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.2d
1192*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <2 x i32>, <2 x i32>* %A
1193*9880d681SAndroid Build Coastguard Worker  %tmp2 = load <2 x i32>, <2 x i32>* %B
1194*9880d681SAndroid Build Coastguard Worker  %tmp3 = load <2 x i64>, <2 x i64>* %C
1195*9880d681SAndroid Build Coastguard Worker  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
1196*9880d681SAndroid Build Coastguard Worker  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
1197*9880d681SAndroid Build Coastguard Worker  %tmp6 = sub <2 x i64> %tmp3, %tmp5
1198*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %tmp6
1199*9880d681SAndroid Build Coastguard Worker}
1200*9880d681SAndroid Build Coastguard Worker
1201*9880d681SAndroid Build Coastguard Worker; Scalar FMULX
1202*9880d681SAndroid Build Coastguard Workerdefine float @fmulxs(float %a, float %b) nounwind {
1203*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxs:
1204*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx s0, s0, s1
1205*9880d681SAndroid Build Coastguard Worker  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
1206*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret
1207*9880d681SAndroid Build Coastguard Worker  ret float %fmulx.i
1208*9880d681SAndroid Build Coastguard Worker}
1209*9880d681SAndroid Build Coastguard Worker
1210*9880d681SAndroid Build Coastguard Workerdefine double @fmulxd(double %a, double %b) nounwind {
1211*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxd:
1212*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx d0, d0, d1
1213*9880d681SAndroid Build Coastguard Worker  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
1214*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret
1215*9880d681SAndroid Build Coastguard Worker  ret double %fmulx.i
1216*9880d681SAndroid Build Coastguard Worker}
1217*9880d681SAndroid Build Coastguard Worker
1218*9880d681SAndroid Build Coastguard Workerdefine float @fmulxs_lane(float %a, <4 x float> %vec) nounwind {
1219*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxs_lane:
1220*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx.s s0, s0, v1[3]
1221*9880d681SAndroid Build Coastguard Worker  %b = extractelement <4 x float> %vec, i32 3
1222*9880d681SAndroid Build Coastguard Worker  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
1223*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret
1224*9880d681SAndroid Build Coastguard Worker  ret float %fmulx.i
1225*9880d681SAndroid Build Coastguard Worker}
1226*9880d681SAndroid Build Coastguard Worker
1227*9880d681SAndroid Build Coastguard Workerdefine double @fmulxd_lane(double %a, <2 x double> %vec) nounwind {
1228*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxd_lane:
1229*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx d0, d0, v1[1]
1230*9880d681SAndroid Build Coastguard Worker  %b = extractelement <2 x double> %vec, i32 1
1231*9880d681SAndroid Build Coastguard Worker  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
1232*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret
1233*9880d681SAndroid Build Coastguard Worker  ret double %fmulx.i
1234*9880d681SAndroid Build Coastguard Worker}
1235*9880d681SAndroid Build Coastguard Worker
1236*9880d681SAndroid Build Coastguard Workerdeclare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone
1237*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone
1238*9880d681SAndroid Build Coastguard Worker
1239*9880d681SAndroid Build Coastguard Worker
1240*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
1241*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull2_8h_simple:
1242*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.8h v0, v0, v1
1243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1244*9880d681SAndroid Build Coastguard Worker  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1245*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1246*9880d681SAndroid Build Coastguard Worker  %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
1247*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %3
1248*9880d681SAndroid Build Coastguard Worker}
1249*9880d681SAndroid Build Coastguard Worker
1250*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind {
1251*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo0:
1252*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.8h v0, v0, v1
1253*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %a to <2 x i64>
1254*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1255*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
1256*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
1257*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1258*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
1259*9880d681SAndroid Build Coastguard Worker  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
1260*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %vmull.i.i
1261*9880d681SAndroid Build Coastguard Worker}
1262*9880d681SAndroid Build Coastguard Worker
1263*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind {
1264*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo1:
1265*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.4s v0, v0, v1
1266*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %a to <2 x i64>
1267*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1268*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1269*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
1270*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1271*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
1272*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1273*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i.i
1274*9880d681SAndroid Build Coastguard Worker}
1275*9880d681SAndroid Build Coastguard Worker
1276*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind {
1277*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo2:
1278*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.2d v0, v0, v1
1279*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %a to <2 x i64>
1280*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1281*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1282*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
1283*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1284*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
1285*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1286*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i.i
1287*9880d681SAndroid Build Coastguard Worker}
1288*9880d681SAndroid Build Coastguard Worker
1289*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind {
1290*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo3:
1291*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.8h v0, v0, v1
1292*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %a to <2 x i64>
1293*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1294*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
1295*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
1296*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1297*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
1298*9880d681SAndroid Build Coastguard Worker  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
1299*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %vmull.i.i
1300*9880d681SAndroid Build Coastguard Worker}
1301*9880d681SAndroid Build Coastguard Worker
1302*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind {
1303*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo4:
1304*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.4s v0, v0, v1
1305*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %a to <2 x i64>
1306*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1307*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1308*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
1309*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1310*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
1311*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1312*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i.i
1313*9880d681SAndroid Build Coastguard Worker}
1314*9880d681SAndroid Build Coastguard Worker
1315*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
1316*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo5:
1317*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.2d v0, v0, v1
1318*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %a to <2 x i64>
1319*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1320*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1321*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
1322*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1323*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
1324*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1325*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i.i
1326*9880d681SAndroid Build Coastguard Worker}
1327*9880d681SAndroid Build Coastguard Worker
1328*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
1329*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo6:
1330*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.4s v0, v1, v2[1]
1331*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1332*9880d681SAndroid Build Coastguard Workerentry:
1333*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <8 x i16> %b to <2 x i64>
1334*9880d681SAndroid Build Coastguard Worker  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1335*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
1336*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1337*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
1338*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i
1339*9880d681SAndroid Build Coastguard Worker}
1340*9880d681SAndroid Build Coastguard Worker
1341*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
1342*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo7:
1343*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.2d v0, v1, v2[1]
1344*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1345*9880d681SAndroid Build Coastguard Workerentry:
1346*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <4 x i32> %b to <2 x i64>
1347*9880d681SAndroid Build Coastguard Worker  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1348*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
1349*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1350*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
1351*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i
1352*9880d681SAndroid Build Coastguard Worker}
1353*9880d681SAndroid Build Coastguard Worker
1354*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
1355*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo8:
1356*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: umull2.4s v0, v1, v2[1]
1357*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1358*9880d681SAndroid Build Coastguard Workerentry:
1359*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <8 x i16> %b to <2 x i64>
1360*9880d681SAndroid Build Coastguard Worker  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1361*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
1362*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1363*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
1364*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i
1365*9880d681SAndroid Build Coastguard Worker}
1366*9880d681SAndroid Build Coastguard Worker
1367*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
1368*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo9:
1369*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: umull2.2d v0, v1, v2[1]
1370*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1371*9880d681SAndroid Build Coastguard Workerentry:
1372*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <4 x i32> %b to <2 x i64>
1373*9880d681SAndroid Build Coastguard Worker  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1374*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
1375*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
1376*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
1377*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i
1378*9880d681SAndroid Build Coastguard Worker}
1379*9880d681SAndroid Build Coastguard Worker
1380*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
1381*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar0:
1382*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.8h v0, v1, v2
1383*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1384*9880d681SAndroid Build Coastguard Worker
1385*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %b to <2 x i64>
1386*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1387*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
1388*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
1389*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1390*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
1391*9880d681SAndroid Build Coastguard Worker  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
1392*9880d681SAndroid Build Coastguard Worker  %add.i = add <8 x i16> %vmull.i.i.i, %a
1393*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %add.i
1394*9880d681SAndroid Build Coastguard Worker}
1395*9880d681SAndroid Build Coastguard Worker
1396*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
1397*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar1:
1398*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.4s v0, v1, v2
1399*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1400*9880d681SAndroid Build Coastguard Worker
1401*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %b to <2 x i64>
1402*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1403*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
1404*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
1405*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1406*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
1407*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1408*9880d681SAndroid Build Coastguard Worker  %add.i = add <4 x i32> %vmull2.i.i.i, %a
1409*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add.i
1410*9880d681SAndroid Build Coastguard Worker}
1411*9880d681SAndroid Build Coastguard Worker
1412*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
1413*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar2:
1414*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.2d v0, v1, v2
1415*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1416*9880d681SAndroid Build Coastguard Worker
1417*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %b to <2 x i64>
1418*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1419*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
1420*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
1421*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1422*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
1423*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1424*9880d681SAndroid Build Coastguard Worker  %add.i = add <2 x i64> %vmull2.i.i.i, %a
1425*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add.i
1426*9880d681SAndroid Build Coastguard Worker}
1427*9880d681SAndroid Build Coastguard Worker
1428*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
1429*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar3:
1430*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.8h v0, v1, v2
1431*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1432*9880d681SAndroid Build Coastguard Worker
1433*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <16 x i8> %b to <2 x i64>
1434*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1435*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
1436*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
1437*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1438*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
1439*9880d681SAndroid Build Coastguard Worker  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
1440*9880d681SAndroid Build Coastguard Worker  %add.i = add <8 x i16> %vmull.i.i.i, %a
1441*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %add.i
1442*9880d681SAndroid Build Coastguard Worker}
1443*9880d681SAndroid Build Coastguard Worker
1444*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
1445*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar4:
1446*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.4s v0, v1, v2
1447*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1448*9880d681SAndroid Build Coastguard Worker
1449*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %b to <2 x i64>
1450*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1451*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
1452*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
1453*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1454*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
1455*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1456*9880d681SAndroid Build Coastguard Worker  %add.i = add <4 x i32> %vmull2.i.i.i, %a
1457*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add.i
1458*9880d681SAndroid Build Coastguard Worker}
1459*9880d681SAndroid Build Coastguard Worker
1460*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
1461*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar5:
1462*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d v0, v1, v2
1463*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1464*9880d681SAndroid Build Coastguard Worker
1465*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %b to <2 x i64>
1466*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1467*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
1468*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
1469*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1470*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
1471*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1472*9880d681SAndroid Build Coastguard Worker  %add.i = add <2 x i64> %vmull2.i.i.i, %a
1473*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add.i
1474*9880d681SAndroid Build Coastguard Worker}
1475*9880d681SAndroid Build Coastguard Worker
1476*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
1477*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_1:
1478*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.4s v0, v1, v2[3]
1479*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1480*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1481*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %b to <2 x i64>
1482*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1483*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1484*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
1485*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1486*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
1487*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1488*9880d681SAndroid Build Coastguard Worker  %add = add <4 x i32> %vmull2.i.i, %a
1489*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
1490*9880d681SAndroid Build Coastguard Worker}
1491*9880d681SAndroid Build Coastguard Worker
1492*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
1493*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_2:
1494*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.2d v0, v1, v2[1]
1495*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1496*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1497*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %b to <2 x i64>
1498*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1499*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1500*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
1501*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1502*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
1503*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1504*9880d681SAndroid Build Coastguard Worker  %add = add <2 x i64> %vmull2.i.i, %a
1505*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add
1506*9880d681SAndroid Build Coastguard Worker}
1507*9880d681SAndroid Build Coastguard Worker
1508*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
1509*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_4:
1510*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.4s v0, v1, v2[2]
1511*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1512*9880d681SAndroid Build Coastguard Worker
1513*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
1514*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <8 x i16> %b to <2 x i64>
1515*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1516*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1517*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
1518*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1519*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
1520*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
1521*9880d681SAndroid Build Coastguard Worker  %add = add <4 x i32> %vmull2.i.i, %a
1522*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
1523*9880d681SAndroid Build Coastguard Worker}
1524*9880d681SAndroid Build Coastguard Worker
1525*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
1526*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_5:
1527*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d v0, v1, v2[0]
1528*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1529*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer
1530*9880d681SAndroid Build Coastguard Worker  %tmp = bitcast <4 x i32> %b to <2 x i64>
1531*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
1532*9880d681SAndroid Build Coastguard Worker  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1533*9880d681SAndroid Build Coastguard Worker  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
1534*9880d681SAndroid Build Coastguard Worker  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
1535*9880d681SAndroid Build Coastguard Worker  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
1536*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
1537*9880d681SAndroid Build Coastguard Worker  %add = add <2 x i64> %vmull2.i.i, %a
1538*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %add
1539*9880d681SAndroid Build Coastguard Worker}
1540*9880d681SAndroid Build Coastguard Worker
1541*9880d681SAndroid Build Coastguard Worker; rdar://12328502
1542*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp {
1543*9880d681SAndroid Build Coastguard Workerentry:
1544*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_n_f64:
1545*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.2d
1546*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.2d v0, v0, v1[0]
1547*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <2 x double> undef, double %y, i32 0
1548*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %y, i32 1
1549*9880d681SAndroid Build Coastguard Worker  %mul.i = fmul <2 x double> %vecinit1.i, %x
1550*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %mul.i
1551*9880d681SAndroid Build Coastguard Worker}
1552*9880d681SAndroid Build Coastguard Worker
1553*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp {
1554*9880d681SAndroid Build Coastguard Workerentry:
1555*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_n_f32:
1556*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.4s
1557*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.4s v0, v0, v1[0]
1558*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x float> undef, float %y, i32 0
1559*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %y, i32 1
1560*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %y, i32 2
1561*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %y, i32 3
1562*9880d681SAndroid Build Coastguard Worker  %mul.i = fmul <4 x float> %vecinit3.i, %x
1563*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %mul.i
1564*9880d681SAndroid Build Coastguard Worker}
1565*9880d681SAndroid Build Coastguard Worker
1566*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp {
1567*9880d681SAndroid Build Coastguard Workerentry:
1568*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_n_f32:
1569*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.2s
1570*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.2s v0, v0, v1[0]
1571*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <2 x float> undef, float %y, i32 0
1572*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %y, i32 1
1573*9880d681SAndroid Build Coastguard Worker  %mul.i = fmul <2 x float> %vecinit1.i, %x
1574*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %mul.i
1575*9880d681SAndroid Build Coastguard Worker}
1576*9880d681SAndroid Build Coastguard Worker
1577*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp {
1578*9880d681SAndroid Build Coastguard Workerentry:
1579*9880d681SAndroid Build Coastguard Worker; CHECK: vmla_laneq_s16_test
1580*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1581*9880d681SAndroid Build Coastguard Worker; CHECK: mla.4h v0, v1, v2[6]
1582*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1583*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
1584*9880d681SAndroid Build Coastguard Worker  %mul = mul <4 x i16> %shuffle, %b
1585*9880d681SAndroid Build Coastguard Worker  %add = add <4 x i16> %mul, %a
1586*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %add
1587*9880d681SAndroid Build Coastguard Worker}
1588*9880d681SAndroid Build Coastguard Worker
1589*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp {
1590*9880d681SAndroid Build Coastguard Workerentry:
1591*9880d681SAndroid Build Coastguard Worker; CHECK: vmla_laneq_s32_test
1592*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1593*9880d681SAndroid Build Coastguard Worker; CHECK: mla.2s v0, v1, v2[3]
1594*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1595*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
1596*9880d681SAndroid Build Coastguard Worker  %mul = mul <2 x i32> %shuffle, %b
1597*9880d681SAndroid Build Coastguard Worker  %add = add <2 x i32> %mul, %a
1598*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %add
1599*9880d681SAndroid Build Coastguard Worker}
1600*9880d681SAndroid Build Coastguard Worker
1601*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @not_really_vmlaq_laneq_s16_test(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone ssp {
1602*9880d681SAndroid Build Coastguard Workerentry:
1603*9880d681SAndroid Build Coastguard Worker; CHECK: not_really_vmlaq_laneq_s16_test
1604*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1605*9880d681SAndroid Build Coastguard Worker; CHECK: mla.8h v0, v1, v2[5]
1606*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1607*9880d681SAndroid Build Coastguard Worker  %shuffle1 = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1608*9880d681SAndroid Build Coastguard Worker  %shuffle2 = shufflevector <4 x i16> %shuffle1, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1609*9880d681SAndroid Build Coastguard Worker  %mul = mul <8 x i16> %shuffle2, %b
1610*9880d681SAndroid Build Coastguard Worker  %add = add <8 x i16> %mul, %a
1611*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %add
1612*9880d681SAndroid Build Coastguard Worker}
1613*9880d681SAndroid Build Coastguard Worker
1614*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @not_really_vmlaq_laneq_s32_test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone ssp {
1615*9880d681SAndroid Build Coastguard Workerentry:
1616*9880d681SAndroid Build Coastguard Worker; CHECK: not_really_vmlaq_laneq_s32_test
1617*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1618*9880d681SAndroid Build Coastguard Worker; CHECK: mla.4s v0, v1, v2[3]
1619*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1620*9880d681SAndroid Build Coastguard Worker  %shuffle1 = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1621*9880d681SAndroid Build Coastguard Worker  %shuffle2 = shufflevector <2 x i32> %shuffle1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1622*9880d681SAndroid Build Coastguard Worker  %mul = mul <4 x i32> %shuffle2, %b
1623*9880d681SAndroid Build Coastguard Worker  %add = add <4 x i32> %mul, %a
1624*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
1625*9880d681SAndroid Build Coastguard Worker}
1626*9880d681SAndroid Build Coastguard Worker
1627*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
1628*9880d681SAndroid Build Coastguard Workerentry:
1629*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_s16_test
1630*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1631*9880d681SAndroid Build Coastguard Worker; CHECK: smull.4s v0, v0, v1[6]
1632*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1633*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
1634*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
1635*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i
1636*9880d681SAndroid Build Coastguard Worker}
1637*9880d681SAndroid Build Coastguard Worker
1638*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
1639*9880d681SAndroid Build Coastguard Workerentry:
1640*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_s32_test
1641*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1642*9880d681SAndroid Build Coastguard Worker; CHECK: smull.2d v0, v0, v1[2]
1643*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1644*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
1645*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
1646*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i
1647*9880d681SAndroid Build Coastguard Worker}
1648*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
1649*9880d681SAndroid Build Coastguard Workerentry:
1650*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_u16_test
1651*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1652*9880d681SAndroid Build Coastguard Worker; CHECK: umull.4s v0, v0, v1[6]
1653*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1654*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
1655*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
1656*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i
1657*9880d681SAndroid Build Coastguard Worker}
1658*9880d681SAndroid Build Coastguard Worker
1659*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
1660*9880d681SAndroid Build Coastguard Workerentry:
1661*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_u32_test
1662*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1663*9880d681SAndroid Build Coastguard Worker; CHECK: umull.2d v0, v0, v1[2]
1664*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1665*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
1666*9880d681SAndroid Build Coastguard Worker  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
1667*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i
1668*9880d681SAndroid Build Coastguard Worker}
1669*9880d681SAndroid Build Coastguard Worker
1670*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
1671*9880d681SAndroid Build Coastguard Workerentry:
1672*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_s16_test
1673*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1674*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.4s
1675*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1676*9880d681SAndroid Build Coastguard Worker  %conv = trunc i32 %d to i16
1677*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <8 x i16> %b to <2 x i64>
1678*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1679*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1680*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
1681*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
1682*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
1683*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
1684*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
1685*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i.i
1686*9880d681SAndroid Build Coastguard Worker}
1687*9880d681SAndroid Build Coastguard Worker
1688*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
1689*9880d681SAndroid Build Coastguard Workerentry:
1690*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_s32_test
1691*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1692*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.2d
1693*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1694*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <4 x i32> %b to <2 x i64>
1695*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1696*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1697*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
1698*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
1699*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
1700*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i.i
1701*9880d681SAndroid Build Coastguard Worker}
1702*9880d681SAndroid Build Coastguard Worker
1703*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
1704*9880d681SAndroid Build Coastguard Workerentry:
1705*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_u16_test
1706*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1707*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.4s
1708*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1709*9880d681SAndroid Build Coastguard Worker  %conv = trunc i32 %d to i16
1710*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <8 x i16> %b to <2 x i64>
1711*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1712*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
1713*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
1714*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
1715*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
1716*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
1717*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
1718*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vmull2.i.i
1719*9880d681SAndroid Build Coastguard Worker}
1720*9880d681SAndroid Build Coastguard Worker
1721*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
1722*9880d681SAndroid Build Coastguard Workerentry:
1723*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_u32_test
1724*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1725*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.2d
1726*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret
1727*9880d681SAndroid Build Coastguard Worker  %0 = bitcast <4 x i32> %b to <2 x i64>
1728*9880d681SAndroid Build Coastguard Worker  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
1729*9880d681SAndroid Build Coastguard Worker  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
1730*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
1731*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
1732*9880d681SAndroid Build Coastguard Worker  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
1733*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %vmull2.i.i
1734*9880d681SAndroid Build Coastguard Worker}
1735*9880d681SAndroid Build Coastguard Worker
1736*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) {
1737*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_built_dup_test:
1738*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins
1739*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup
1740*9880d681SAndroid Build Coastguard Worker; CHECK: mul.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[1]
1741*9880d681SAndroid Build Coastguard Worker  %vget_lane = extractelement <4 x i32> %b, i32 1
1742*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1743*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1744*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1745*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1746*9880d681SAndroid Build Coastguard Worker  %prod = mul <4 x i32> %a, %vecinit3.i
1747*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %prod
1748*9880d681SAndroid Build Coastguard Worker}
1749*9880d681SAndroid Build Coastguard Worker
1750*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
1751*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_built_dup_fromsmall_test:
1752*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins
1753*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup
1754*9880d681SAndroid Build Coastguard Worker; CHECK: mul.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[3]
1755*9880d681SAndroid Build Coastguard Worker  %vget_lane = extractelement <4 x i16> %b, i32 3
1756*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1757*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1758*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1759*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1760*9880d681SAndroid Build Coastguard Worker  %prod = mul <4 x i16> %a, %vecinit3.i
1761*9880d681SAndroid Build Coastguard Worker  ret <4 x i16> %prod
1762*9880d681SAndroid Build Coastguard Worker}
1763*9880d681SAndroid Build Coastguard Worker
1764*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) {
1765*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_built_dup_fromsmall_test:
1766*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins
1767*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup
1768*9880d681SAndroid Build Coastguard Worker; CHECK: mul.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
1769*9880d681SAndroid Build Coastguard Worker  %vget_lane = extractelement <4 x i16> %b, i32 0
1770*9880d681SAndroid Build Coastguard Worker  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1771*9880d681SAndroid Build Coastguard Worker  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1772*9880d681SAndroid Build Coastguard Worker  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1773*9880d681SAndroid Build Coastguard Worker  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1774*9880d681SAndroid Build Coastguard Worker  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1775*9880d681SAndroid Build Coastguard Worker  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1776*9880d681SAndroid Build Coastguard Worker  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1777*9880d681SAndroid Build Coastguard Worker  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1778*9880d681SAndroid Build Coastguard Worker  %prod = mul <8 x i16> %a, %vecinit7.i
1779*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %prod
1780*9880d681SAndroid Build Coastguard Worker}
1781*9880d681SAndroid Build Coastguard Worker
1782*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) {
1783*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mull_from_two_extracts:
1784*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1785*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d
1786*9880d681SAndroid Build Coastguard Worker
1787*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1788*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1789*9880d681SAndroid Build Coastguard Worker
1790*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
1791*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1792*9880d681SAndroid Build Coastguard Worker}
1793*9880d681SAndroid Build Coastguard Worker
1794*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
1795*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal_from_two_extracts:
1796*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1797*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmlal2.2d
1798*9880d681SAndroid Build Coastguard Worker
1799*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1800*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1801*9880d681SAndroid Build Coastguard Worker
1802*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
1803*9880d681SAndroid Build Coastguard Worker  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
1804*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %sum
1805*9880d681SAndroid Build Coastguard Worker}
1806*9880d681SAndroid Build Coastguard Worker
1807*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
1808*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mull_from_extract_dup:
1809*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1810*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d
1811*9880d681SAndroid Build Coastguard Worker  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
1812*9880d681SAndroid Build Coastguard Worker  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
1813*9880d681SAndroid Build Coastguard Worker
1814*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1815*9880d681SAndroid Build Coastguard Worker
1816*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
1817*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1818*9880d681SAndroid Build Coastguard Worker}
1819*9880d681SAndroid Build Coastguard Worker
1820*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
1821*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: pmull_from_extract_dup:
1822*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1823*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.8h
1824*9880d681SAndroid Build Coastguard Worker  %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
1825*9880d681SAndroid Build Coastguard Worker  %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1826*9880d681SAndroid Build Coastguard Worker
1827*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1828*9880d681SAndroid Build Coastguard Worker
1829*9880d681SAndroid Build Coastguard Worker  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
1830*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
1831*9880d681SAndroid Build Coastguard Worker}
1832*9880d681SAndroid Build Coastguard Worker
1833*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
1834*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: pmull_from_extract_duplane:
1835*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1836*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.8h
1837*9880d681SAndroid Build Coastguard Worker
1838*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1839*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1840*9880d681SAndroid Build Coastguard Worker
1841*9880d681SAndroid Build Coastguard Worker  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
1842*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
1843*9880d681SAndroid Build Coastguard Worker}
1844*9880d681SAndroid Build Coastguard Worker
1845*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
1846*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: sqdmull_from_extract_duplane:
1847*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1848*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d
1849*9880d681SAndroid Build Coastguard Worker
1850*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1851*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
1852*9880d681SAndroid Build Coastguard Worker
1853*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
1854*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1855*9880d681SAndroid Build Coastguard Worker}
1856*9880d681SAndroid Build Coastguard Worker
1857*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
1858*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: sqdmlal_from_extract_duplane:
1859*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1860*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmlal2.2d
1861*9880d681SAndroid Build Coastguard Worker
1862*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1863*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
1864*9880d681SAndroid Build Coastguard Worker
1865*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
1866*9880d681SAndroid Build Coastguard Worker  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
1867*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %sum
1868*9880d681SAndroid Build Coastguard Worker}
1869*9880d681SAndroid Build Coastguard Worker
1870*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
1871*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_from_extract_duplane:
1872*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext
1873*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d
1874*9880d681SAndroid Build Coastguard Worker
1875*9880d681SAndroid Build Coastguard Worker  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
1876*9880d681SAndroid Build Coastguard Worker  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
1877*9880d681SAndroid Build Coastguard Worker
1878*9880d681SAndroid Build Coastguard Worker  %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
1879*9880d681SAndroid Build Coastguard Worker  %sum = add <2 x i64> %accum, %res
1880*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %sum
1881*9880d681SAndroid Build Coastguard Worker}
1882*9880d681SAndroid Build Coastguard Worker
1883*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
1884*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v4f32:
1885*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.s s0, s1, v2[3]
1886*9880d681SAndroid Build Coastguard Worker  %rhs = extractelement <4 x float> %rvec, i32 3
1887*9880d681SAndroid Build Coastguard Worker  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
1888*9880d681SAndroid Build Coastguard Worker  ret float %res
1889*9880d681SAndroid Build Coastguard Worker}
1890*9880d681SAndroid Build Coastguard Worker
1891*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
1892*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v2f32:
1893*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.s s0, s1, v2[1]
1894*9880d681SAndroid Build Coastguard Worker  %rhs = extractelement <2 x float> %rvec, i32 1
1895*9880d681SAndroid Build Coastguard Worker  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
1896*9880d681SAndroid Build Coastguard Worker  ret float %res
1897*9880d681SAndroid Build Coastguard Worker}
1898*9880d681SAndroid Build Coastguard Worker
1899*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
1900*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v4f32:
1901*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.s s0, s1, v2[3]
1902*9880d681SAndroid Build Coastguard Worker  %rhs.scal = extractelement <4 x float> %rvec, i32 3
1903*9880d681SAndroid Build Coastguard Worker  %rhs = fsub float -0.0, %rhs.scal
1904*9880d681SAndroid Build Coastguard Worker  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
1905*9880d681SAndroid Build Coastguard Worker  ret float %res
1906*9880d681SAndroid Build Coastguard Worker}
1907*9880d681SAndroid Build Coastguard Worker
1908*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
1909*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v2f32:
1910*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.s s0, s1, v2[1]
1911*9880d681SAndroid Build Coastguard Worker  %rhs.scal = extractelement <2 x float> %rvec, i32 1
1912*9880d681SAndroid Build Coastguard Worker  %rhs = fsub float -0.0, %rhs.scal
1913*9880d681SAndroid Build Coastguard Worker  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
1914*9880d681SAndroid Build Coastguard Worker  ret float %res
1915*9880d681SAndroid Build Coastguard Worker}
1916*9880d681SAndroid Build Coastguard Worker
1917*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fma.f32(float, float, float)
1918*9880d681SAndroid Build Coastguard Worker
1919*9880d681SAndroid Build Coastguard Workerdefine double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
1920*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v2f64:
1921*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.d d0, d1, v2[1]
1922*9880d681SAndroid Build Coastguard Worker  %rhs = extractelement <2 x double> %rvec, i32 1
1923*9880d681SAndroid Build Coastguard Worker  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
1924*9880d681SAndroid Build Coastguard Worker  ret double %res
1925*9880d681SAndroid Build Coastguard Worker}
1926*9880d681SAndroid Build Coastguard Worker
1927*9880d681SAndroid Build Coastguard Workerdefine double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
1928*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v2f64:
1929*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.d d0, d1, v2[1]
1930*9880d681SAndroid Build Coastguard Worker  %rhs.scal = extractelement <2 x double> %rvec, i32 1
1931*9880d681SAndroid Build Coastguard Worker  %rhs = fsub double -0.0, %rhs.scal
1932*9880d681SAndroid Build Coastguard Worker  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
1933*9880d681SAndroid Build Coastguard Worker  ret double %res
1934*9880d681SAndroid Build Coastguard Worker}
1935*9880d681SAndroid Build Coastguard Worker
1936*9880d681SAndroid Build Coastguard Workerdeclare double @llvm.fma.f64(double, double, double)
1937*9880d681SAndroid Build Coastguard Worker
1938*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) {
1939*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32:
1940*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2s v0, v1, v2[3]
1941*9880d681SAndroid Build Coastguard Worker  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
1942*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3>
1943*9880d681SAndroid Build Coastguard Worker  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
1944*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %res
1945*9880d681SAndroid Build Coastguard Worker}
1946*9880d681SAndroid Build Coastguard Worker
1947*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) {
1948*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1:
1949*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2s v0, v1, v2[1]
1950*9880d681SAndroid Build Coastguard Worker  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
1951*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1>
1952*9880d681SAndroid Build Coastguard Worker  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
1953*9880d681SAndroid Build Coastguard Worker  ret <2 x float> %res
1954*9880d681SAndroid Build Coastguard Worker}
1955*9880d681SAndroid Build Coastguard Worker
1956*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) {
1957*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32:
1958*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.4s v0, v1, v2[3]
1959*9880d681SAndroid Build Coastguard Worker  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
1960*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1961*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
1962*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
1963*9880d681SAndroid Build Coastguard Worker}
1964*9880d681SAndroid Build Coastguard Worker
1965*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) {
1966*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1:
1967*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.4s v0, v1, v2[1]
1968*9880d681SAndroid Build Coastguard Worker  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
1969*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1970*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
1971*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
1972*9880d681SAndroid Build Coastguard Worker}
1973*9880d681SAndroid Build Coastguard Worker
1974*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) {
1975*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f64:
1976*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2d v0, v1, v2[1]
1977*9880d681SAndroid Build Coastguard Worker  %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs
1978*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1>
1979*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %lhs, <2 x double> %splat, <2 x double> %accum)
1980*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
1981*9880d681SAndroid Build Coastguard Worker}
1982*9880d681SAndroid Build Coastguard Worker
1983*9880d681SAndroid Build Coastguard Workerdefine <1 x double> @test_fmul_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
1984*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_fmul_v1f64:
1985*9880d681SAndroid Build Coastguard Worker; CHECK: fmul
1986*9880d681SAndroid Build Coastguard Worker  %prod = fmul <1 x double> %L, %R
1987*9880d681SAndroid Build Coastguard Worker  ret <1 x double> %prod
1988*9880d681SAndroid Build Coastguard Worker}
1989*9880d681SAndroid Build Coastguard Worker
1990*9880d681SAndroid Build Coastguard Workerdefine <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
1991*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_fdiv_v1f64:
1992*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fdiv
1993*9880d681SAndroid Build Coastguard Worker  %prod = fdiv <1 x double> %L, %R
1994*9880d681SAndroid Build Coastguard Worker  ret <1 x double> %prod
1995*9880d681SAndroid Build Coastguard Worker}
1996*9880d681SAndroid Build Coastguard Worker
1997*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
1998*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_d:
1999*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal
2000*9880d681SAndroid Build Coastguard Worker  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
2001*9880d681SAndroid Build Coastguard Worker  %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4)
2002*9880d681SAndroid Build Coastguard Worker  ret i64 %tmp5
2003*9880d681SAndroid Build Coastguard Worker}
2004*9880d681SAndroid Build Coastguard Worker
2005*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
2006*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_d:
2007*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl
2008*9880d681SAndroid Build Coastguard Worker  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
2009*9880d681SAndroid Build Coastguard Worker  %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4)
2010*9880d681SAndroid Build Coastguard Worker  ret i64 %tmp5
2011*9880d681SAndroid Build Coastguard Worker}
2012*9880d681SAndroid Build Coastguard Worker
2013*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
2014*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_pmull_64:
2015*9880d681SAndroid Build Coastguard Worker; CHECK: pmull.1q
2016*9880d681SAndroid Build Coastguard Worker  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
2017*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %val
2018*9880d681SAndroid Build Coastguard Worker}
2019*9880d681SAndroid Build Coastguard Worker
2020*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
2021*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_pmull_high_64:
2022*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.1q
2023*9880d681SAndroid Build Coastguard Worker  %l_hi = extractelement <2 x i64> %l, i32 1
2024*9880d681SAndroid Build Coastguard Worker  %r_hi = extractelement <2 x i64> %r, i32 1
2025*9880d681SAndroid Build Coastguard Worker  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
2026*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %val
2027*9880d681SAndroid Build Coastguard Worker}
2028*9880d681SAndroid Build Coastguard Worker
2029*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
2030*9880d681SAndroid Build Coastguard Worker
2031*9880d681SAndroid Build Coastguard Workerdefine <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
2032*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mul_v1i64:
2033*9880d681SAndroid Build Coastguard Worker; CHECK: mul
2034*9880d681SAndroid Build Coastguard Worker  %prod = mul <1 x i64> %lhs, %rhs
2035*9880d681SAndroid Build Coastguard Worker  ret <1 x i64> %prod
2036*9880d681SAndroid Build Coastguard Worker}
2037