1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni8: 5*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8b 6*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8b 7*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8b 8*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 9*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 10*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 11*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 12*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 13*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 14*9880d681SAndroid Build Coastguard Worker} 15*9880d681SAndroid Build Coastguard Worker 16*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 17*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni16: 18*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4h 19*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4h 20*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.4h 21*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 22*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 23*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 24*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 25*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i16> %tmp3, %tmp4 26*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp5 27*9880d681SAndroid Build Coastguard Worker} 28*9880d681SAndroid Build Coastguard Worker 29*9880d681SAndroid Build Coastguard Worker; 2xi32 TRN is redundant with ZIP 30*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 31*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni32: 32*9880d681SAndroid Build Coastguard Worker;CHECK: zip1.2s 33*9880d681SAndroid Build Coastguard Worker;CHECK: zip2.2s 34*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.2s 35*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 36*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 37*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2> 38*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3> 39*9880d681SAndroid Build Coastguard Worker %tmp5 = add <2 x i32> %tmp3, %tmp4 40*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp5 41*9880d681SAndroid Build Coastguard Worker} 42*9880d681SAndroid Build Coastguard Worker 43*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { 44*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnf: 45*9880d681SAndroid Build Coastguard Worker;CHECK: zip1.2s 46*9880d681SAndroid Build Coastguard Worker;CHECK: zip2.2s 47*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: fadd.2s 48*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 49*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 50*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2> 51*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3> 52*9880d681SAndroid Build Coastguard Worker %tmp5 = fadd <2 x float> %tmp3, %tmp4 53*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp5 54*9880d681SAndroid Build Coastguard Worker} 55*9880d681SAndroid Build Coastguard Worker 56*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 57*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi8: 58*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.16b 59*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.16b 60*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.16b 61*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 62*9880d681SAndroid Build Coastguard Worker %tmp2 = load <16 x i8>, <16 x i8>* %B 63*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 64*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 65*9880d681SAndroid Build Coastguard Worker %tmp5 = add <16 x i8> %tmp3, %tmp4 66*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp5 67*9880d681SAndroid Build Coastguard Worker} 68*9880d681SAndroid Build Coastguard Worker 69*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 70*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi16: 71*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8h 72*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8h 73*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8h 74*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 75*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 76*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 77*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 78*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 79*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 80*9880d681SAndroid Build Coastguard Worker} 81*9880d681SAndroid Build Coastguard Worker 82*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 83*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi32: 84*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4s 85*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4s 86*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.4s 87*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 88*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 89*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 90*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 91*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i32> %tmp3, %tmp4 92*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 93*9880d681SAndroid Build Coastguard Worker} 94*9880d681SAndroid Build Coastguard Worker 95*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { 96*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQf: 97*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.4s 98*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.4s 99*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: fadd.4s 100*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 101*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 102*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 103*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 104*9880d681SAndroid Build Coastguard Worker %tmp5 = fadd <4 x float> %tmp3, %tmp4 105*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp5 106*9880d681SAndroid Build Coastguard Worker} 107*9880d681SAndroid Build Coastguard Worker 108*9880d681SAndroid Build Coastguard Worker; Undef shuffle indices should not prevent matching to VTRN: 109*9880d681SAndroid Build Coastguard Worker 110*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 111*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrni8_undef: 112*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8b 113*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8b 114*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8b 115*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 116*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 117*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> 118*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 119*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 120*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 121*9880d681SAndroid Build Coastguard Worker} 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { 124*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: vtrnQi16_undef: 125*9880d681SAndroid Build Coastguard Worker;CHECK: trn1.8h 126*9880d681SAndroid Build Coastguard Worker;CHECK: trn2.8h 127*9880d681SAndroid Build Coastguard Worker;CHECK-NEXT: add.8h 128*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 129*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 130*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> 131*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 132*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 133*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 134*9880d681SAndroid Build Coastguard Worker} 135