1*67e74705SXin Li // REQUIRES: aarch64-registered-target
2*67e74705SXin Li // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
3*67e74705SXin Li // RUN: -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \
4*67e74705SXin Li // RUN: | FileCheck %s
5*67e74705SXin Li
6*67e74705SXin Li // Test new aarch64 intrinsics with poly128
7*67e74705SXin Li // FIXME: Currently, poly128_t equals to uint128, which will be spilt into
8*67e74705SXin Li // two 64-bit GPR(eg X0, X1). Now moving data from X0, X1 to FPR128 will
9*67e74705SXin Li // introduce 2 store and 1 load instructions(store X0, X1 to memory and
10*67e74705SXin Li // then load back to Q0). If target has NEON, this is better replaced by
11*67e74705SXin Li // FMOV or INS.
12*67e74705SXin Li
13*67e74705SXin Li #include <arm_neon.h>
14*67e74705SXin Li
15*67e74705SXin Li // CHECK-LABEL: define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 {
16*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8*
17*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
18*67e74705SXin Li // CHECK: store i128 %val, i128* [[TMP1]]
19*67e74705SXin Li // CHECK: ret void
test_vstrq_p128(poly128_t * ptr,poly128_t val)20*67e74705SXin Li void test_vstrq_p128(poly128_t * ptr, poly128_t val) {
21*67e74705SXin Li vstrq_p128(ptr, val);
22*67e74705SXin Li
23*67e74705SXin Li }
24*67e74705SXin Li
25*67e74705SXin Li // CHECK-LABEL: define i128 @test_vldrq_p128(i128* %ptr) #0 {
26*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8*
27*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
28*67e74705SXin Li // CHECK: [[TMP2:%.*]] = load i128, i128* [[TMP1]]
29*67e74705SXin Li // CHECK: ret i128 [[TMP2]]
test_vldrq_p128(poly128_t * ptr)30*67e74705SXin Li poly128_t test_vldrq_p128(poly128_t * ptr) {
31*67e74705SXin Li return vldrq_p128(ptr);
32*67e74705SXin Li
33*67e74705SXin Li }
34*67e74705SXin Li
35*67e74705SXin Li // CHECK-LABEL: define void @test_ld_st_p128(i128* %ptr) #0 {
36*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8*
37*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128*
38*67e74705SXin Li // CHECK: [[TMP2:%.*]] = load i128, i128* [[TMP1]]
39*67e74705SXin Li // CHECK: [[ADD_PTR:%.*]] = getelementptr inbounds i128, i128* %ptr, i64 1
40*67e74705SXin Li // CHECK: [[TMP3:%.*]] = bitcast i128* [[ADD_PTR]] to i8*
41*67e74705SXin Li // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i128*
42*67e74705SXin Li // CHECK: store i128 [[TMP2]], i128* [[TMP4]]
43*67e74705SXin Li // CHECK: ret void
test_ld_st_p128(poly128_t * ptr)44*67e74705SXin Li void test_ld_st_p128(poly128_t * ptr) {
45*67e74705SXin Li vstrq_p128(ptr+1, vldrq_p128(ptr));
46*67e74705SXin Li
47*67e74705SXin Li }
48*67e74705SXin Li
49*67e74705SXin Li // CHECK-LABEL: define i128 @test_vmull_p64(i64 %a, i64 %b) #0 {
50*67e74705SXin Li // CHECK: [[VMULL_P64_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) #2
51*67e74705SXin Li // CHECK: [[VMULL_P641_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I]] to i128
52*67e74705SXin Li // CHECK: ret i128 [[VMULL_P641_I]]
test_vmull_p64(poly64_t a,poly64_t b)53*67e74705SXin Li poly128_t test_vmull_p64(poly64_t a, poly64_t b) {
54*67e74705SXin Li return vmull_p64(a, b);
55*67e74705SXin Li }
56*67e74705SXin Li
57*67e74705SXin Li // CHECK-LABEL: define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #0 {
58*67e74705SXin Li // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
59*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to i64
60*67e74705SXin Li // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <2 x i64> %b, <2 x i64> %b, <1 x i32> <i32 1>
61*67e74705SXin Li // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I7_I]] to i64
62*67e74705SXin Li // CHECK: [[VMULL_P64_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[TMP0]], i64 [[TMP1]]) #2
63*67e74705SXin Li // CHECK: [[VMULL_P641_I_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I_I]] to i128
64*67e74705SXin Li // CHECK: ret i128 [[VMULL_P641_I_I]]
test_vmull_high_p64(poly64x2_t a,poly64x2_t b)65*67e74705SXin Li poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) {
66*67e74705SXin Li return vmull_high_p64(a, b);
67*67e74705SXin Li }
68*67e74705SXin Li
69*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_s8(<16 x i8> %a) #0 {
70*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
71*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_s8(int8x16_t a)72*67e74705SXin Li poly128_t test_vreinterpretq_p128_s8(int8x16_t a) {
73*67e74705SXin Li return vreinterpretq_p128_s8(a);
74*67e74705SXin Li }
75*67e74705SXin Li
76*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_s16(<8 x i16> %a) #0 {
77*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
78*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_s16(int16x8_t a)79*67e74705SXin Li poly128_t test_vreinterpretq_p128_s16(int16x8_t a) {
80*67e74705SXin Li return vreinterpretq_p128_s16(a);
81*67e74705SXin Li }
82*67e74705SXin Li
83*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_s32(<4 x i32> %a) #0 {
84*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to i128
85*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_s32(int32x4_t a)86*67e74705SXin Li poly128_t test_vreinterpretq_p128_s32(int32x4_t a) {
87*67e74705SXin Li return vreinterpretq_p128_s32(a);
88*67e74705SXin Li }
89*67e74705SXin Li
90*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_s64(<2 x i64> %a) #0 {
91*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
92*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_s64(int64x2_t a)93*67e74705SXin Li poly128_t test_vreinterpretq_p128_s64(int64x2_t a) {
94*67e74705SXin Li return vreinterpretq_p128_s64(a);
95*67e74705SXin Li }
96*67e74705SXin Li
97*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_u8(<16 x i8> %a) #0 {
98*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
99*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_u8(uint8x16_t a)100*67e74705SXin Li poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) {
101*67e74705SXin Li return vreinterpretq_p128_u8(a);
102*67e74705SXin Li }
103*67e74705SXin Li
104*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_u16(<8 x i16> %a) #0 {
105*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
106*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_u16(uint16x8_t a)107*67e74705SXin Li poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) {
108*67e74705SXin Li return vreinterpretq_p128_u16(a);
109*67e74705SXin Li }
110*67e74705SXin Li
111*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_u32(<4 x i32> %a) #0 {
112*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to i128
113*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_u32(uint32x4_t a)114*67e74705SXin Li poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) {
115*67e74705SXin Li return vreinterpretq_p128_u32(a);
116*67e74705SXin Li }
117*67e74705SXin Li
118*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_u64(<2 x i64> %a) #0 {
119*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
120*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_u64(uint64x2_t a)121*67e74705SXin Li poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) {
122*67e74705SXin Li return vreinterpretq_p128_u64(a);
123*67e74705SXin Li }
124*67e74705SXin Li
125*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_f32(<4 x float> %a) #0 {
126*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to i128
127*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_f32(float32x4_t a)128*67e74705SXin Li poly128_t test_vreinterpretq_p128_f32(float32x4_t a) {
129*67e74705SXin Li return vreinterpretq_p128_f32(a);
130*67e74705SXin Li }
131*67e74705SXin Li
132*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_f64(<2 x double> %a) #0 {
133*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to i128
134*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_f64(float64x2_t a)135*67e74705SXin Li poly128_t test_vreinterpretq_p128_f64(float64x2_t a) {
136*67e74705SXin Li return vreinterpretq_p128_f64(a);
137*67e74705SXin Li }
138*67e74705SXin Li
139*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_p8(<16 x i8> %a) #0 {
140*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128
141*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_p8(poly8x16_t a)142*67e74705SXin Li poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) {
143*67e74705SXin Li return vreinterpretq_p128_p8(a);
144*67e74705SXin Li }
145*67e74705SXin Li
146*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_p16(<8 x i16> %a) #0 {
147*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128
148*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_p16(poly16x8_t a)149*67e74705SXin Li poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) {
150*67e74705SXin Li return vreinterpretq_p128_p16(a);
151*67e74705SXin Li }
152*67e74705SXin Li
153*67e74705SXin Li // CHECK-LABEL: define i128 @test_vreinterpretq_p128_p64(<2 x i64> %a) #0 {
154*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128
155*67e74705SXin Li // CHECK: ret i128 [[TMP0]]
test_vreinterpretq_p128_p64(poly64x2_t a)156*67e74705SXin Li poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) {
157*67e74705SXin Li return vreinterpretq_p128_p64(a);
158*67e74705SXin Li }
159*67e74705SXin Li
160*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p128(i128 %a) #0 {
161*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
162*67e74705SXin Li // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p128(poly128_t a)163*67e74705SXin Li int8x16_t test_vreinterpretq_s8_p128(poly128_t a) {
164*67e74705SXin Li return vreinterpretq_s8_p128(a);
165*67e74705SXin Li }
166*67e74705SXin Li
167*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p128(i128 %a) #0 {
168*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
169*67e74705SXin Li // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p128(poly128_t a)170*67e74705SXin Li int16x8_t test_vreinterpretq_s16_p128(poly128_t a) {
171*67e74705SXin Li return vreinterpretq_s16_p128(a);
172*67e74705SXin Li }
173*67e74705SXin Li
174*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p128(i128 %a) #0 {
175*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x i32>
176*67e74705SXin Li // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p128(poly128_t a)177*67e74705SXin Li int32x4_t test_vreinterpretq_s32_p128(poly128_t a) {
178*67e74705SXin Li return vreinterpretq_s32_p128(a);
179*67e74705SXin Li }
180*67e74705SXin Li
181*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p128(i128 %a) #0 {
182*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
183*67e74705SXin Li // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p128(poly128_t a)184*67e74705SXin Li int64x2_t test_vreinterpretq_s64_p128(poly128_t a) {
185*67e74705SXin Li return vreinterpretq_s64_p128(a);
186*67e74705SXin Li }
187*67e74705SXin Li
188*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p128(i128 %a) #0 {
189*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
190*67e74705SXin Li // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p128(poly128_t a)191*67e74705SXin Li uint8x16_t test_vreinterpretq_u8_p128(poly128_t a) {
192*67e74705SXin Li return vreinterpretq_u8_p128(a);
193*67e74705SXin Li }
194*67e74705SXin Li
195*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p128(i128 %a) #0 {
196*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
197*67e74705SXin Li // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p128(poly128_t a)198*67e74705SXin Li uint16x8_t test_vreinterpretq_u16_p128(poly128_t a) {
199*67e74705SXin Li return vreinterpretq_u16_p128(a);
200*67e74705SXin Li }
201*67e74705SXin Li
202*67e74705SXin Li // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p128(i128 %a) #0 {
203*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x i32>
204*67e74705SXin Li // CHECK: ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p128(poly128_t a)205*67e74705SXin Li uint32x4_t test_vreinterpretq_u32_p128(poly128_t a) {
206*67e74705SXin Li return vreinterpretq_u32_p128(a);
207*67e74705SXin Li }
208*67e74705SXin Li
209*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p128(i128 %a) #0 {
210*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
211*67e74705SXin Li // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p128(poly128_t a)212*67e74705SXin Li uint64x2_t test_vreinterpretq_u64_p128(poly128_t a) {
213*67e74705SXin Li return vreinterpretq_u64_p128(a);
214*67e74705SXin Li }
215*67e74705SXin Li
216*67e74705SXin Li // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p128(i128 %a) #0 {
217*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x float>
218*67e74705SXin Li // CHECK: ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p128(poly128_t a)219*67e74705SXin Li float32x4_t test_vreinterpretq_f32_p128(poly128_t a) {
220*67e74705SXin Li return vreinterpretq_f32_p128(a);
221*67e74705SXin Li }
222*67e74705SXin Li
223*67e74705SXin Li // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p128(i128 %a) #0 {
224*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x double>
225*67e74705SXin Li // CHECK: ret <2 x double> [[TMP0]]
test_vreinterpretq_f64_p128(poly128_t a)226*67e74705SXin Li float64x2_t test_vreinterpretq_f64_p128(poly128_t a) {
227*67e74705SXin Li return vreinterpretq_f64_p128(a);
228*67e74705SXin Li }
229*67e74705SXin Li
230*67e74705SXin Li // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p128(i128 %a) #0 {
231*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8>
232*67e74705SXin Li // CHECK: ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p128(poly128_t a)233*67e74705SXin Li poly8x16_t test_vreinterpretq_p8_p128(poly128_t a) {
234*67e74705SXin Li return vreinterpretq_p8_p128(a);
235*67e74705SXin Li }
236*67e74705SXin Li
237*67e74705SXin Li // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p128(i128 %a) #0 {
238*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16>
239*67e74705SXin Li // CHECK: ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p128(poly128_t a)240*67e74705SXin Li poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) {
241*67e74705SXin Li return vreinterpretq_p16_p128(a);
242*67e74705SXin Li }
243*67e74705SXin Li
244*67e74705SXin Li // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p128(i128 %a) #0 {
245*67e74705SXin Li // CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64>
246*67e74705SXin Li // CHECK: ret <2 x i64> [[TMP0]]
test_vreinterpretq_p64_p128(poly128_t a)247*67e74705SXin Li poly64x2_t test_vreinterpretq_p64_p128(poly128_t a) {
248*67e74705SXin Li return vreinterpretq_p64_p128(a);
249*67e74705SXin Li }
250*67e74705SXin Li
251*67e74705SXin Li
252