xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/vldlane.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \
4*9880d681SAndroid Build Coastguard Worker; RUN:	| FileCheck %s
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
7*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1lanei8:
8*9880d681SAndroid Build Coastguard Worker;Check the (default) alignment value.
9*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.8 {d16[3]}, [r0]
10*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %B
11*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i8, i8* %A, align 8
12*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
13*9880d681SAndroid Build Coastguard Worker        ret <8 x i8> %tmp3
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
17*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1lanei16:
18*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 16 bits:
19*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.16 {d16[2]}, [r0:16]
20*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %B
21*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i16, i16* %A, align 8
22*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
23*9880d681SAndroid Build Coastguard Worker        ret <4 x i16> %tmp3
24*9880d681SAndroid Build Coastguard Worker}
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
27*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1lanei32:
28*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 32 bits:
29*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.32 {d16[1]}, [r0:32]
30*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
31*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i32, i32* %A, align 8
32*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
33*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
37*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1lanei32a32:
38*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Legal values are none or :32.
39*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.32 {d16[1]}, [r0:32]
40*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
41*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i32, i32* %A, align 4
42*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
43*9880d681SAndroid Build Coastguard Worker        ret <2 x i32> %tmp3
44*9880d681SAndroid Build Coastguard Worker}
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
47*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1lanef:
48*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.32 {d16[1]}, [r0:32]
49*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %B
50*9880d681SAndroid Build Coastguard Worker	%tmp2 = load float, float* %A, align 4
51*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
52*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp3
53*9880d681SAndroid Build Coastguard Worker}
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
56*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1laneQi8:
57*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.8 {d17[1]}, [r0]
58*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %B
59*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i8, i8* %A, align 8
60*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
61*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp3
62*9880d681SAndroid Build Coastguard Worker}
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
65*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1laneQi16:
66*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.16 {d17[1]}, [r0:16]
67*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %B
68*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i16, i16* %A, align 8
69*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
70*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp3
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
74*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1laneQi32:
75*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.32 {d17[1]}, [r0:32]
76*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %B
77*9880d681SAndroid Build Coastguard Worker	%tmp2 = load i32, i32* %A, align 8
78*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
79*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp3
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
83*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld1laneQf:
84*9880d681SAndroid Build Coastguard Worker;CHECK: vld1.32 {d16[0]}, [r0:32]
85*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %B
86*9880d681SAndroid Build Coastguard Worker	%tmp2 = load float, float* %A
87*9880d681SAndroid Build Coastguard Worker	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
88*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp3
89*9880d681SAndroid Build Coastguard Worker}
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Worker%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
92*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
93*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
94*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
97*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
98*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
101*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2lanei8:
102*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 16 bits:
103*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
104*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %B
105*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
106*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
107*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
108*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
109*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
110*9880d681SAndroid Build Coastguard Worker}
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
113*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2lanei16:
114*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 32 bits:
115*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
116*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
117*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %B
118*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
119*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
120*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
121*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
122*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp5
123*9880d681SAndroid Build Coastguard Worker}
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
126*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2lanei32:
127*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.32
128*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
129*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
130*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
131*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
132*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
133*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i32> %tmp3, %tmp4
134*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp5
135*9880d681SAndroid Build Coastguard Worker}
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Worker;Check for a post-increment updating load.
138*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
139*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2lanei32_update:
140*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
141*9880d681SAndroid Build Coastguard Worker	%A = load i32*, i32** %ptr
142*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
143*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
144*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
145*9880d681SAndroid Build Coastguard Worker	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
146*9880d681SAndroid Build Coastguard Worker	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
147*9880d681SAndroid Build Coastguard Worker	%tmp5 = add <2 x i32> %tmp3, %tmp4
148*9880d681SAndroid Build Coastguard Worker	%tmp6 = getelementptr i32, i32* %A, i32 2
149*9880d681SAndroid Build Coastguard Worker	store i32* %tmp6, i32** %ptr
150*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp5
151*9880d681SAndroid Build Coastguard Worker}
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
154*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2lanef:
155*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.32
156*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
157*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %B
158*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
159*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
160*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
161*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <2 x float> %tmp3, %tmp4
162*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp5
163*9880d681SAndroid Build Coastguard Worker}
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
166*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2laneQi16:
167*9880d681SAndroid Build Coastguard Worker;Check the (default) alignment.
168*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
169*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
170*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %B
171*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
172*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
173*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
174*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
175*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
176*9880d681SAndroid Build Coastguard Worker}
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
179*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2laneQi32:
180*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 64 bits:
181*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
182*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
183*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %B
184*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
185*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
186*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
187*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
188*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
189*9880d681SAndroid Build Coastguard Worker}
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
192*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld2laneQf:
193*9880d681SAndroid Build Coastguard Worker;CHECK: vld2.32
194*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
195*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %B
196*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
197*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
198*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
199*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <4 x float> %tmp3, %tmp4
200*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp5
201*9880d681SAndroid Build Coastguard Worker}
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
204*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
205*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
206*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
209*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
210*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Worker%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
213*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
214*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
215*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
218*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
219*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
220*9880d681SAndroid Build Coastguard Worker
221*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
222*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3lanei8:
223*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.8
224*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %B
225*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
226*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
227*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
228*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
229*9880d681SAndroid Build Coastguard Worker        %tmp6 = add <8 x i8> %tmp3, %tmp4
230*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <8 x i8> %tmp5, %tmp6
231*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp7
232*9880d681SAndroid Build Coastguard Worker}
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
235*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3lanei16:
236*9880d681SAndroid Build Coastguard Worker;Check the (default) alignment value.  VLD3 does not support alignment.
237*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
238*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
239*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %B
240*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
241*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
242*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
243*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
244*9880d681SAndroid Build Coastguard Worker        %tmp6 = add <4 x i16> %tmp3, %tmp4
245*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <4 x i16> %tmp5, %tmp6
246*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp7
247*9880d681SAndroid Build Coastguard Worker}
248*9880d681SAndroid Build Coastguard Worker
249*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
250*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3lanei32:
251*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.32
252*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
253*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
254*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
255*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
256*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
257*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
258*9880d681SAndroid Build Coastguard Worker        %tmp6 = add <2 x i32> %tmp3, %tmp4
259*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <2 x i32> %tmp5, %tmp6
260*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp7
261*9880d681SAndroid Build Coastguard Worker}
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
264*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3lanef:
265*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.32
266*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
267*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %B
268*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
269*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
270*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
271*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
272*9880d681SAndroid Build Coastguard Worker        %tmp6 = fadd <2 x float> %tmp3, %tmp4
273*9880d681SAndroid Build Coastguard Worker        %tmp7 = fadd <2 x float> %tmp5, %tmp6
274*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp7
275*9880d681SAndroid Build Coastguard Worker}
276*9880d681SAndroid Build Coastguard Worker
277*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
278*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3laneQi16:
279*9880d681SAndroid Build Coastguard Worker;Check the (default) alignment value.  VLD3 does not support alignment.
280*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
281*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
282*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %B
283*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
284*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
285*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
286*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
287*9880d681SAndroid Build Coastguard Worker        %tmp6 = add <8 x i16> %tmp3, %tmp4
288*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <8 x i16> %tmp5, %tmp6
289*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp7
290*9880d681SAndroid Build Coastguard Worker}
291*9880d681SAndroid Build Coastguard Worker
292*9880d681SAndroid Build Coastguard Worker;Check for a post-increment updating load with register increment.
293*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
294*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3laneQi16_update:
295*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
296*9880d681SAndroid Build Coastguard Worker	%A = load i16*, i16** %ptr
297*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
298*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %B
299*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
300*9880d681SAndroid Build Coastguard Worker	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
301*9880d681SAndroid Build Coastguard Worker	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
302*9880d681SAndroid Build Coastguard Worker	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
303*9880d681SAndroid Build Coastguard Worker	%tmp6 = add <8 x i16> %tmp3, %tmp4
304*9880d681SAndroid Build Coastguard Worker	%tmp7 = add <8 x i16> %tmp5, %tmp6
305*9880d681SAndroid Build Coastguard Worker	%tmp8 = getelementptr i16, i16* %A, i32 %inc
306*9880d681SAndroid Build Coastguard Worker	store i16* %tmp8, i16** %ptr
307*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp7
308*9880d681SAndroid Build Coastguard Worker}
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
311*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3laneQi32:
312*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.32
313*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
314*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %B
315*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
316*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
317*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
318*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
319*9880d681SAndroid Build Coastguard Worker        %tmp6 = add <4 x i32> %tmp3, %tmp4
320*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <4 x i32> %tmp5, %tmp6
321*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp7
322*9880d681SAndroid Build Coastguard Worker}
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
325*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld3laneQf:
326*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.32
327*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
328*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %B
329*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
330*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
331*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
332*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
333*9880d681SAndroid Build Coastguard Worker        %tmp6 = fadd <4 x float> %tmp3, %tmp4
334*9880d681SAndroid Build Coastguard Worker        %tmp7 = fadd <4 x float> %tmp5, %tmp6
335*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp7
336*9880d681SAndroid Build Coastguard Worker}
337*9880d681SAndroid Build Coastguard Worker
338*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
339*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
340*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
341*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
342*9880d681SAndroid Build Coastguard Worker
343*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
344*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
345*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
346*9880d681SAndroid Build Coastguard Worker
347*9880d681SAndroid Build Coastguard Worker%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
348*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
349*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
350*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
351*9880d681SAndroid Build Coastguard Worker
352*9880d681SAndroid Build Coastguard Worker%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
353*9880d681SAndroid Build Coastguard Worker%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
354*9880d681SAndroid Build Coastguard Worker%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
355*9880d681SAndroid Build Coastguard Worker
356*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
357*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4lanei8:
358*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 32 bits:
359*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
360*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %B
361*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
362*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
363*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
364*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
365*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
366*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <8 x i8> %tmp3, %tmp4
367*9880d681SAndroid Build Coastguard Worker        %tmp8 = add <8 x i8> %tmp5, %tmp6
368*9880d681SAndroid Build Coastguard Worker        %tmp9 = add <8 x i8> %tmp7, %tmp8
369*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp9
370*9880d681SAndroid Build Coastguard Worker}
371*9880d681SAndroid Build Coastguard Worker
372*9880d681SAndroid Build Coastguard Worker;Check for a post-increment updating load.
373*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
374*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4lanei8_update:
375*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
376*9880d681SAndroid Build Coastguard Worker	%A = load i8*, i8** %ptr
377*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %B
378*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
379*9880d681SAndroid Build Coastguard Worker	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
380*9880d681SAndroid Build Coastguard Worker	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
381*9880d681SAndroid Build Coastguard Worker	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
382*9880d681SAndroid Build Coastguard Worker	%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
383*9880d681SAndroid Build Coastguard Worker	%tmp7 = add <8 x i8> %tmp3, %tmp4
384*9880d681SAndroid Build Coastguard Worker	%tmp8 = add <8 x i8> %tmp5, %tmp6
385*9880d681SAndroid Build Coastguard Worker	%tmp9 = add <8 x i8> %tmp7, %tmp8
386*9880d681SAndroid Build Coastguard Worker	%tmp10 = getelementptr i8, i8* %A, i32 4
387*9880d681SAndroid Build Coastguard Worker	store i8* %tmp10, i8** %ptr
388*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp9
389*9880d681SAndroid Build Coastguard Worker}
390*9880d681SAndroid Build Coastguard Worker
391*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
392*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4lanei16:
393*9880d681SAndroid Build Coastguard Worker;Check that a power-of-two alignment smaller than the total size of the memory
394*9880d681SAndroid Build Coastguard Worker;being loaded is ignored.
395*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
396*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
397*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %B
398*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
399*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
400*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
401*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
402*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
403*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <4 x i16> %tmp3, %tmp4
404*9880d681SAndroid Build Coastguard Worker        %tmp8 = add <4 x i16> %tmp5, %tmp6
405*9880d681SAndroid Build Coastguard Worker        %tmp9 = add <4 x i16> %tmp7, %tmp8
406*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp9
407*9880d681SAndroid Build Coastguard Worker}
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
410*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4lanei32:
411*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  An 8-byte alignment is allowed here even though
412*9880d681SAndroid Build Coastguard Worker;it is smaller than the total size of the memory being loaded.
413*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
414*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
415*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %B
416*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
417*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
418*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
419*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
420*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
421*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <2 x i32> %tmp3, %tmp4
422*9880d681SAndroid Build Coastguard Worker        %tmp8 = add <2 x i32> %tmp5, %tmp6
423*9880d681SAndroid Build Coastguard Worker        %tmp9 = add <2 x i32> %tmp7, %tmp8
424*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp9
425*9880d681SAndroid Build Coastguard Worker}
426*9880d681SAndroid Build Coastguard Worker
427*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
428*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4lanef:
429*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.32
430*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
431*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %B
432*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
433*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
434*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
435*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
436*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
437*9880d681SAndroid Build Coastguard Worker        %tmp7 = fadd <2 x float> %tmp3, %tmp4
438*9880d681SAndroid Build Coastguard Worker        %tmp8 = fadd <2 x float> %tmp5, %tmp6
439*9880d681SAndroid Build Coastguard Worker        %tmp9 = fadd <2 x float> %tmp7, %tmp8
440*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp9
441*9880d681SAndroid Build Coastguard Worker}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
444*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4laneQi16:
445*9880d681SAndroid Build Coastguard Worker;Check the alignment value.  Max for this instruction is 64 bits:
446*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
447*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i16* %A to i8*
448*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %B
449*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
450*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
451*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
452*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
453*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
454*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <8 x i16> %tmp3, %tmp4
455*9880d681SAndroid Build Coastguard Worker        %tmp8 = add <8 x i16> %tmp5, %tmp6
456*9880d681SAndroid Build Coastguard Worker        %tmp9 = add <8 x i16> %tmp7, %tmp8
457*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp9
458*9880d681SAndroid Build Coastguard Worker}
459*9880d681SAndroid Build Coastguard Worker
460*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
461*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4laneQi32:
462*9880d681SAndroid Build Coastguard Worker;Check the (default) alignment.
463*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
464*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast i32* %A to i8*
465*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %B
466*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
467*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
468*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
469*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
470*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
471*9880d681SAndroid Build Coastguard Worker        %tmp7 = add <4 x i32> %tmp3, %tmp4
472*9880d681SAndroid Build Coastguard Worker        %tmp8 = add <4 x i32> %tmp5, %tmp6
473*9880d681SAndroid Build Coastguard Worker        %tmp9 = add <4 x i32> %tmp7, %tmp8
474*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp9
475*9880d681SAndroid Build Coastguard Worker}
476*9880d681SAndroid Build Coastguard Worker
477*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
478*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vld4laneQf:
479*9880d681SAndroid Build Coastguard Worker;CHECK: vld4.32
480*9880d681SAndroid Build Coastguard Worker	%tmp0 = bitcast float* %A to i8*
481*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %B
482*9880d681SAndroid Build Coastguard Worker	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
483*9880d681SAndroid Build Coastguard Worker        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
484*9880d681SAndroid Build Coastguard Worker        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
485*9880d681SAndroid Build Coastguard Worker        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
486*9880d681SAndroid Build Coastguard Worker        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
487*9880d681SAndroid Build Coastguard Worker        %tmp7 = fadd <4 x float> %tmp3, %tmp4
488*9880d681SAndroid Build Coastguard Worker        %tmp8 = fadd <4 x float> %tmp5, %tmp6
489*9880d681SAndroid Build Coastguard Worker        %tmp9 = fadd <4 x float> %tmp7, %tmp8
490*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp9
491*9880d681SAndroid Build Coastguard Worker}
492*9880d681SAndroid Build Coastguard Worker
493*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
494*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
495*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
496*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
497*9880d681SAndroid Build Coastguard Worker
498*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
499*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
500*9880d681SAndroid Build Coastguard Workerdeclare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
501*9880d681SAndroid Build Coastguard Worker
502*9880d681SAndroid Build Coastguard Worker; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
503*9880d681SAndroid Build Coastguard Worker; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
504*9880d681SAndroid Build Coastguard Worker; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
505*9880d681SAndroid Build Coastguard Worker; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
506*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
507*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: test_qqqq_regsequence_subreg:
508*9880d681SAndroid Build Coastguard Worker;CHECK: vld3.16
509*9880d681SAndroid Build Coastguard Worker  %tmp63 = extractvalue [6 x i64] %b, 5
510*9880d681SAndroid Build Coastguard Worker  %tmp64 = zext i64 %tmp63 to i128
511*9880d681SAndroid Build Coastguard Worker  %tmp65 = shl i128 %tmp64, 64
512*9880d681SAndroid Build Coastguard Worker  %ins67 = or i128 %tmp65, 0
513*9880d681SAndroid Build Coastguard Worker  %tmp78 = bitcast i128 %ins67 to <8 x i16>
514*9880d681SAndroid Build Coastguard Worker  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
515*9880d681SAndroid Build Coastguard Worker  %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
516*9880d681SAndroid Build Coastguard Worker  %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
517*9880d681SAndroid Build Coastguard Worker  %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
518*9880d681SAndroid Build Coastguard Worker  %tmp6 = add <8 x i16> %tmp3, %tmp4
519*9880d681SAndroid Build Coastguard Worker  %tmp7 = add <8 x i16> %tmp5, %tmp6
520*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %tmp7
521*9880d681SAndroid Build Coastguard Worker}
522*9880d681SAndroid Build Coastguard Worker
523*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.trap() nounwind
524