xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/arm64-trn.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni8:
5*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8b
6*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8b
7*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8b
8*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
9*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
10*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
11*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
12*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
13*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
17*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni16:
18*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4h
19*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4h
20*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.4h
21*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i16>, <4 x i16>* %A
22*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i16>, <4 x i16>* %B
23*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
24*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
25*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i16> %tmp3, %tmp4
26*9880d681SAndroid Build Coastguard Worker	ret <4 x i16> %tmp5
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; 2xi32 TRN is redundant with ZIP
30*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
31*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni32:
32*9880d681SAndroid Build Coastguard Worker;CHECK: zip1.2s
33*9880d681SAndroid Build Coastguard Worker;CHECK: zip2.2s
34*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.2s
35*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x i32>, <2 x i32>* %A
36*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x i32>, <2 x i32>* %B
37*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
38*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
39*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <2 x i32> %tmp3, %tmp4
40*9880d681SAndroid Build Coastguard Worker	ret <2 x i32> %tmp5
41*9880d681SAndroid Build Coastguard Worker}
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
44*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnf:
45*9880d681SAndroid Build Coastguard Worker;CHECK: zip1.2s
46*9880d681SAndroid Build Coastguard Worker;CHECK: zip2.2s
47*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: fadd.2s
48*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <2 x float>, <2 x float>* %A
49*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <2 x float>, <2 x float>* %B
50*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
51*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
52*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <2 x float> %tmp3, %tmp4
53*9880d681SAndroid Build Coastguard Worker	ret <2 x float> %tmp5
54*9880d681SAndroid Build Coastguard Worker}
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
57*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi8:
58*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.16b
59*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.16b
60*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.16b
61*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <16 x i8>, <16 x i8>* %A
62*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <16 x i8>, <16 x i8>* %B
63*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
64*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
65*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <16 x i8> %tmp3, %tmp4
66*9880d681SAndroid Build Coastguard Worker	ret <16 x i8> %tmp5
67*9880d681SAndroid Build Coastguard Worker}
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
70*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi16:
71*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8h
72*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8h
73*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8h
74*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
75*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
76*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
77*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
78*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
79*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
83*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi32:
84*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4s
85*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4s
86*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.4s
87*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x i32>, <4 x i32>* %A
88*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x i32>, <4 x i32>* %B
89*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
90*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
91*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <4 x i32> %tmp3, %tmp4
92*9880d681SAndroid Build Coastguard Worker	ret <4 x i32> %tmp5
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
96*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQf:
97*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4s
98*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4s
99*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: fadd.4s
100*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <4 x float>, <4 x float>* %A
101*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <4 x float>, <4 x float>* %B
102*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
103*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
104*9880d681SAndroid Build Coastguard Worker        %tmp5 = fadd <4 x float> %tmp3, %tmp4
105*9880d681SAndroid Build Coastguard Worker	ret <4 x float> %tmp5
106*9880d681SAndroid Build Coastguard Worker}
107*9880d681SAndroid Build Coastguard Worker
108*9880d681SAndroid Build Coastguard Worker; Undef shuffle indices should not prevent matching to VTRN:
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
111*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni8_undef:
112*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8b
113*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8b
114*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8b
115*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i8>, <8 x i8>* %A
116*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i8>, <8 x i8>* %B
117*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
118*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
119*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i8> %tmp3, %tmp4
120*9880d681SAndroid Build Coastguard Worker	ret <8 x i8> %tmp5
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
124*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi16_undef:
125*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8h
126*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8h
127*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8h
128*9880d681SAndroid Build Coastguard Worker	%tmp1 = load <8 x i16>, <8 x i16>* %A
129*9880d681SAndroid Build Coastguard Worker	%tmp2 = load <8 x i16>, <8 x i16>* %B
130*9880d681SAndroid Build Coastguard Worker	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
131*9880d681SAndroid Build Coastguard Worker	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
132*9880d681SAndroid Build Coastguard Worker        %tmp5 = add <8 x i16> %tmp3, %tmp4
133*9880d681SAndroid Build Coastguard Worker	ret <8 x i16> %tmp5
134*9880d681SAndroid Build Coastguard Worker}
135