1*9880d681SAndroid Build Coastguard Worker; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull8h: 6*9880d681SAndroid Build Coastguard Worker;CHECK: smull.8h 7*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 8*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 9*9880d681SAndroid Build Coastguard Worker %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) 10*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 11*9880d681SAndroid Build Coastguard Worker} 12*9880d681SAndroid Build Coastguard Worker 13*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 14*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull4s: 15*9880d681SAndroid Build Coastguard Worker;CHECK: smull.4s 16*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 17*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 18*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 19*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 20*9880d681SAndroid Build Coastguard Worker} 21*9880d681SAndroid Build Coastguard Worker 22*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 23*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull2d: 24*9880d681SAndroid Build Coastguard Worker;CHECK: smull.2d 25*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 26*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 27*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 28*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp3 29*9880d681SAndroid Build Coastguard Worker} 30*9880d681SAndroid Build Coastguard Worker 31*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 32*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 33*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 34*9880d681SAndroid Build Coastguard Worker 35*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 36*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull8h: 37*9880d681SAndroid Build Coastguard Worker;CHECK: umull.8h 38*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 39*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 40*9880d681SAndroid Build Coastguard Worker %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) 41*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 42*9880d681SAndroid Build Coastguard Worker} 43*9880d681SAndroid Build Coastguard Worker 44*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 45*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull4s: 46*9880d681SAndroid Build Coastguard Worker;CHECK: umull.4s 47*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 48*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 49*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 50*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 51*9880d681SAndroid Build Coastguard Worker} 52*9880d681SAndroid Build Coastguard Worker 53*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 54*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull2d: 55*9880d681SAndroid Build Coastguard Worker;CHECK: umull.2d 56*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 57*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 58*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 59*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp3 60*9880d681SAndroid Build Coastguard Worker} 61*9880d681SAndroid Build Coastguard Worker 62*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 63*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 64*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 65*9880d681SAndroid Build Coastguard Worker 66*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 67*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull4s: 68*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.4s 69*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 70*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 71*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 72*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 73*9880d681SAndroid Build Coastguard Worker} 74*9880d681SAndroid Build Coastguard Worker 75*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 76*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2d: 77*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.2d 78*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 79*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 80*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 81*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp3 82*9880d681SAndroid Build Coastguard Worker} 83*9880d681SAndroid Build Coastguard Worker 84*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 85*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_4s: 86*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.4s 87*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 88*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 89*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 90*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 91*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 92*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 93*9880d681SAndroid Build Coastguard Worker} 94*9880d681SAndroid Build Coastguard Worker 95*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 96*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_2d: 97*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.2d 98*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 99*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 100*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 101*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 102*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 103*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp3 104*9880d681SAndroid Build Coastguard Worker} 105*9880d681SAndroid Build Coastguard Worker 106*9880d681SAndroid Build Coastguard Worker 107*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 108*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 109*9880d681SAndroid Build Coastguard Worker 110*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 111*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: pmull8h: 112*9880d681SAndroid Build Coastguard Worker;CHECK: pmull.8h 113*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 114*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 115*9880d681SAndroid Build Coastguard Worker %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) 116*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 117*9880d681SAndroid Build Coastguard Worker} 118*9880d681SAndroid Build Coastguard Worker 119*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 120*9880d681SAndroid Build Coastguard Worker 121*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 122*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_4h: 123*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4h 124*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 125*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 126*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 127*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp3 128*9880d681SAndroid Build Coastguard Worker} 129*9880d681SAndroid Build Coastguard Worker 130*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 131*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_8h: 132*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.8h 133*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 134*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 135*9880d681SAndroid Build Coastguard Worker %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 136*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 137*9880d681SAndroid Build Coastguard Worker} 138*9880d681SAndroid Build Coastguard Worker 139*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 140*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_2s: 141*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.2s 142*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 143*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 144*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 145*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp3 146*9880d681SAndroid Build Coastguard Worker} 147*9880d681SAndroid Build Coastguard Worker 148*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 149*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_4s: 150*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4s 151*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 152*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 153*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 154*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 155*9880d681SAndroid Build Coastguard Worker} 156*9880d681SAndroid Build Coastguard Worker 157*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind { 158*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_1s: 159*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}} 160*9880d681SAndroid Build Coastguard Worker %tmp1 = load i32, i32* %A 161*9880d681SAndroid Build Coastguard Worker %tmp2 = load i32, i32* %B 162*9880d681SAndroid Build Coastguard Worker %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2) 163*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 164*9880d681SAndroid Build Coastguard Worker} 165*9880d681SAndroid Build Coastguard Worker 166*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 167*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 168*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 169*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 170*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone 171*9880d681SAndroid Build Coastguard Worker 172*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 173*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_4h: 174*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4h 175*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 176*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 177*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 178*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp3 179*9880d681SAndroid Build Coastguard Worker} 180*9880d681SAndroid Build Coastguard Worker 181*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 182*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_8h: 183*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.8h 184*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 185*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 186*9880d681SAndroid Build Coastguard Worker %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 187*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 188*9880d681SAndroid Build Coastguard Worker} 189*9880d681SAndroid Build Coastguard Worker 190*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 191*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_2s: 192*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.2s 193*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 194*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 195*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 196*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp3 197*9880d681SAndroid Build Coastguard Worker} 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 200*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_4s: 201*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4s 202*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 203*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 204*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 205*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 206*9880d681SAndroid Build Coastguard Worker} 207*9880d681SAndroid Build Coastguard Worker 208*9880d681SAndroid Build Coastguard Workerdefine i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind { 209*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_1s: 210*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}} 211*9880d681SAndroid Build Coastguard Worker %tmp1 = load i32, i32* %A 212*9880d681SAndroid Build Coastguard Worker %tmp2 = load i32, i32* %B 213*9880d681SAndroid Build Coastguard Worker %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2) 214*9880d681SAndroid Build Coastguard Worker ret i32 %tmp3 215*9880d681SAndroid Build Coastguard Worker} 216*9880d681SAndroid Build Coastguard Worker 217*9880d681SAndroid Build Coastguard Workerdeclare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 218*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 219*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 220*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 221*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone 222*9880d681SAndroid Build Coastguard Worker 223*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 224*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_2s: 225*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2s 226*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 227*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 228*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 229*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp3 230*9880d681SAndroid Build Coastguard Worker} 231*9880d681SAndroid Build Coastguard Worker 232*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 233*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_4s: 234*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.4s 235*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 236*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 237*9880d681SAndroid Build Coastguard Worker %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 238*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp3 239*9880d681SAndroid Build Coastguard Worker} 240*9880d681SAndroid Build Coastguard Worker 241*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 242*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_2d: 243*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2d 244*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 245*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 246*9880d681SAndroid Build Coastguard Worker %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 247*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp3 248*9880d681SAndroid Build Coastguard Worker} 249*9880d681SAndroid Build Coastguard Worker 250*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone 251*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone 252*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone 253*9880d681SAndroid Build Coastguard Worker 254*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 255*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal4s: 256*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.4s 257*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 258*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 259*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 260*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 261*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i32> %tmp3, %tmp4 262*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 263*9880d681SAndroid Build Coastguard Worker} 264*9880d681SAndroid Build Coastguard Worker 265*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 266*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal2d: 267*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.2d 268*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 269*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 270*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 271*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 272*9880d681SAndroid Build Coastguard Worker %tmp5 = add <2 x i64> %tmp3, %tmp4 273*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 274*9880d681SAndroid Build Coastguard Worker} 275*9880d681SAndroid Build Coastguard Worker 276*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 277*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl4s: 278*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.4s 279*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 280*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 281*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 282*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 283*9880d681SAndroid Build Coastguard Worker %tmp5 = sub <4 x i32> %tmp3, %tmp4 284*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 285*9880d681SAndroid Build Coastguard Worker} 286*9880d681SAndroid Build Coastguard Worker 287*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 288*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl2d: 289*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.2d 290*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 291*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 292*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 293*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 294*9880d681SAndroid Build Coastguard Worker %tmp5 = sub <2 x i64> %tmp3, %tmp4 295*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 296*9880d681SAndroid Build Coastguard Worker} 297*9880d681SAndroid Build Coastguard Worker 298*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 299*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 300*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 301*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 302*9880d681SAndroid Build Coastguard Worker 303*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 304*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal4s: 305*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s 306*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 307*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 308*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 309*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 310*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) 311*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 312*9880d681SAndroid Build Coastguard Worker} 313*9880d681SAndroid Build Coastguard Worker 314*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 315*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2d: 316*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.2d 317*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 318*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 319*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 320*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 321*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) 322*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 323*9880d681SAndroid Build Coastguard Worker} 324*9880d681SAndroid Build Coastguard Worker 325*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 326*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_4s: 327*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.4s 328*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 329*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 330*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 331*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 332*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 333*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 334*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) 335*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 336*9880d681SAndroid Build Coastguard Worker} 337*9880d681SAndroid Build Coastguard Worker 338*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 339*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_2d: 340*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.2d 341*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 342*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 343*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 344*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 345*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 346*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 347*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) 348*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 349*9880d681SAndroid Build Coastguard Worker} 350*9880d681SAndroid Build Coastguard Worker 351*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 352*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl4s: 353*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s 354*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 355*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 356*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 357*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 358*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) 359*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 360*9880d681SAndroid Build Coastguard Worker} 361*9880d681SAndroid Build Coastguard Worker 362*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 363*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2d: 364*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.2d 365*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 366*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 367*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 368*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 369*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) 370*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 371*9880d681SAndroid Build Coastguard Worker} 372*9880d681SAndroid Build Coastguard Worker 373*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 374*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_4s: 375*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.4s 376*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 377*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 378*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 379*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 380*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 381*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 382*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4) 383*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 384*9880d681SAndroid Build Coastguard Worker} 385*9880d681SAndroid Build Coastguard Worker 386*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 387*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_2d: 388*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.2d 389*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 390*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 391*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 392*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 393*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 394*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 395*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4) 396*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 397*9880d681SAndroid Build Coastguard Worker} 398*9880d681SAndroid Build Coastguard Worker 399*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 400*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal4s: 401*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.4s 402*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 403*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 404*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 405*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 406*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i32> %tmp3, %tmp4 407*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 408*9880d681SAndroid Build Coastguard Worker} 409*9880d681SAndroid Build Coastguard Worker 410*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 411*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal2d: 412*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.2d 413*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 414*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 415*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 416*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 417*9880d681SAndroid Build Coastguard Worker %tmp5 = add <2 x i64> %tmp3, %tmp4 418*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 419*9880d681SAndroid Build Coastguard Worker} 420*9880d681SAndroid Build Coastguard Worker 421*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 422*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl4s: 423*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.4s 424*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 425*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 426*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 427*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 428*9880d681SAndroid Build Coastguard Worker %tmp5 = sub <4 x i32> %tmp3, %tmp4 429*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 430*9880d681SAndroid Build Coastguard Worker} 431*9880d681SAndroid Build Coastguard Worker 432*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 433*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl2d: 434*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.2d 435*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 436*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 437*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 438*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 439*9880d681SAndroid Build Coastguard Worker %tmp5 = sub <2 x i64> %tmp3, %tmp4 440*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 441*9880d681SAndroid Build Coastguard Worker} 442*9880d681SAndroid Build Coastguard Worker 443*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { 444*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_2s: 445*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.2s 446*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 447*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 448*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x float>, <2 x float>* %C 449*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3) 450*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp4 451*9880d681SAndroid Build Coastguard Worker} 452*9880d681SAndroid Build Coastguard Worker 453*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { 454*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_4s: 455*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.4s 456*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 457*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 458*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x float>, <4 x float>* %C 459*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3) 460*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp4 461*9880d681SAndroid Build Coastguard Worker} 462*9880d681SAndroid Build Coastguard Worker 463*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind { 464*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmla_2d: 465*9880d681SAndroid Build Coastguard Worker;CHECK: fmla.2d 466*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 467*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 468*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x double>, <2 x double>* %C 469*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3) 470*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp4 471*9880d681SAndroid Build Coastguard Worker} 472*9880d681SAndroid Build Coastguard Worker 473*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 474*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 475*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 476*9880d681SAndroid Build Coastguard Worker 477*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { 478*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_2s: 479*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s 480*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 481*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 482*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x float>, <2 x float>* %C 483*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2 484*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3) 485*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp5 486*9880d681SAndroid Build Coastguard Worker} 487*9880d681SAndroid Build Coastguard Worker 488*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { 489*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_4s: 490*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s 491*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 492*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 493*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x float>, <4 x float>* %C 494*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2 495*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3) 496*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp5 497*9880d681SAndroid Build Coastguard Worker} 498*9880d681SAndroid Build Coastguard Worker 499*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind { 500*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_2d: 501*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d 502*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 503*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 504*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x double>, <2 x double>* %C 505*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2 506*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3) 507*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp5 508*9880d681SAndroid Build Coastguard Worker} 509*9880d681SAndroid Build Coastguard Worker 510*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { 511*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_2s: 512*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s 513*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 514*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 515*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x float>, <2 x float>* %C 516*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2 517*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3) 518*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp5 519*9880d681SAndroid Build Coastguard Worker} 520*9880d681SAndroid Build Coastguard Worker 521*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { 522*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_4s: 523*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s 524*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 525*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 526*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x float>, <4 x float>* %C 527*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2 528*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3) 529*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp5 530*9880d681SAndroid Build Coastguard Worker} 531*9880d681SAndroid Build Coastguard Worker 532*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind { 533*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_commuted_neg_2d: 534*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d 535*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 536*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 537*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x double>, <2 x double>* %C 538*9880d681SAndroid Build Coastguard Worker %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2 539*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3) 540*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp5 541*9880d681SAndroid Build Coastguard Worker} 542*9880d681SAndroid Build Coastguard Worker 543*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { 544*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_2s: 545*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2s 546*9880d681SAndroid Build Coastguard Workerentry: 547*9880d681SAndroid Build Coastguard Worker %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c 548*9880d681SAndroid Build Coastguard Worker %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer 549*9880d681SAndroid Build Coastguard Worker %fmls1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a) 550*9880d681SAndroid Build Coastguard Worker ret <2 x float> %fmls1 551*9880d681SAndroid Build Coastguard Worker} 552*9880d681SAndroid Build Coastguard Worker 553*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp { 554*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_4s: 555*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.4s 556*9880d681SAndroid Build Coastguard Workerentry: 557*9880d681SAndroid Build Coastguard Worker %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 558*9880d681SAndroid Build Coastguard Worker %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 559*9880d681SAndroid Build Coastguard Worker %fmls1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a) 560*9880d681SAndroid Build Coastguard Worker ret <4 x float> %fmls1 561*9880d681SAndroid Build Coastguard Worker} 562*9880d681SAndroid Build Coastguard Worker 563*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp { 564*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmls_indexed_2d: 565*9880d681SAndroid Build Coastguard Worker;CHECK: fmls.2d 566*9880d681SAndroid Build Coastguard Workerentry: 567*9880d681SAndroid Build Coastguard Worker %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 568*9880d681SAndroid Build Coastguard Worker %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 569*9880d681SAndroid Build Coastguard Worker %fmls1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a) 570*9880d681SAndroid Build Coastguard Worker ret <2 x double> %fmls1 571*9880d681SAndroid Build Coastguard Worker} 572*9880d681SAndroid Build Coastguard Worker 573*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp { 574*9880d681SAndroid Build Coastguard Workerentry: 575*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_2s: 576*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.2s 577*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 578*9880d681SAndroid Build Coastguard Worker %v1 = insertelement <2 x float> undef, float %c, i32 0 579*9880d681SAndroid Build Coastguard Worker %v2 = insertelement <2 x float> %v1, float %c, i32 1 580*9880d681SAndroid Build Coastguard Worker %fmla1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %v1, <2 x float> %b, <2 x float> %a) nounwind 581*9880d681SAndroid Build Coastguard Worker ret <2 x float> %fmla1 582*9880d681SAndroid Build Coastguard Worker} 583*9880d681SAndroid Build Coastguard Worker 584*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp { 585*9880d681SAndroid Build Coastguard Workerentry: 586*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_4s: 587*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.4s 588*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 589*9880d681SAndroid Build Coastguard Worker %v1 = insertelement <4 x float> undef, float %c, i32 0 590*9880d681SAndroid Build Coastguard Worker %v2 = insertelement <4 x float> %v1, float %c, i32 1 591*9880d681SAndroid Build Coastguard Worker %v3 = insertelement <4 x float> %v2, float %c, i32 2 592*9880d681SAndroid Build Coastguard Worker %v4 = insertelement <4 x float> %v3, float %c, i32 3 593*9880d681SAndroid Build Coastguard Worker %fmla1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a) nounwind 594*9880d681SAndroid Build Coastguard Worker ret <4 x float> %fmla1 595*9880d681SAndroid Build Coastguard Worker} 596*9880d681SAndroid Build Coastguard Worker 597*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp { 598*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmla_indexed_scalar_2d: 599*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: fmla.2d 600*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 601*9880d681SAndroid Build Coastguard Workerentry: 602*9880d681SAndroid Build Coastguard Worker %v1 = insertelement <2 x double> undef, double %c, i32 0 603*9880d681SAndroid Build Coastguard Worker %v2 = insertelement <2 x double> %v1, double %c, i32 1 604*9880d681SAndroid Build Coastguard Worker %fmla1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a) nounwind 605*9880d681SAndroid Build Coastguard Worker ret <2 x double> %fmla1 606*9880d681SAndroid Build Coastguard Worker} 607*9880d681SAndroid Build Coastguard Worker 608*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 609*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_4h: 610*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 611*9880d681SAndroid Build Coastguard Worker;CHECK: mul.4h 612*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 613*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 614*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 615*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <4 x i16> %tmp1, %tmp3 616*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp4 617*9880d681SAndroid Build Coastguard Worker} 618*9880d681SAndroid Build Coastguard Worker 619*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @mul_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 620*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_8h: 621*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 622*9880d681SAndroid Build Coastguard Worker;CHECK: mul.8h 623*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 624*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 625*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 626*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <8 x i16> %tmp1, %tmp3 627*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 628*9880d681SAndroid Build Coastguard Worker} 629*9880d681SAndroid Build Coastguard Worker 630*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @mul_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 631*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_2s: 632*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 633*9880d681SAndroid Build Coastguard Worker;CHECK: mul.2s 634*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 635*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 636*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 637*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <2 x i32> %tmp1, %tmp3 638*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp4 639*9880d681SAndroid Build Coastguard Worker} 640*9880d681SAndroid Build Coastguard Worker 641*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mul_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 642*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: mul_4s: 643*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 644*9880d681SAndroid Build Coastguard Worker;CHECK: mul.4s 645*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 646*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 647*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 648*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <4 x i32> %tmp1, %tmp3 649*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 650*9880d681SAndroid Build Coastguard Worker} 651*9880d681SAndroid Build Coastguard Worker 652*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { 653*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2d: 654*9880d681SAndroid Build Coastguard Worker; CHECK: mul 655*9880d681SAndroid Build Coastguard Worker; CHECK: mul 656*9880d681SAndroid Build Coastguard Worker %tmp1 = mul <2 x i64> %A, %B 657*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp1 658*9880d681SAndroid Build Coastguard Worker} 659*9880d681SAndroid Build Coastguard Worker 660*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmul_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 661*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_2s: 662*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 663*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.2s 664*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 665*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 666*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1> 667*9880d681SAndroid Build Coastguard Worker %tmp4 = fmul <2 x float> %tmp1, %tmp3 668*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp4 669*9880d681SAndroid Build Coastguard Worker} 670*9880d681SAndroid Build Coastguard Worker 671*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmul_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 672*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_4s: 673*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 674*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.4s 675*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 676*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 677*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 678*9880d681SAndroid Build Coastguard Worker %tmp4 = fmul <4 x float> %tmp1, %tmp3 679*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp4 680*9880d681SAndroid Build Coastguard Worker} 681*9880d681SAndroid Build Coastguard Worker 682*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmul_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 683*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_2d: 684*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 685*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.2d 686*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 687*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 688*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1> 689*9880d681SAndroid Build Coastguard Worker %tmp4 = fmul <2 x double> %tmp1, %tmp3 690*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp4 691*9880d681SAndroid Build Coastguard Worker} 692*9880d681SAndroid Build Coastguard Worker 693*9880d681SAndroid Build Coastguard Workerdefine float @fmul_lane_s(float %A, <4 x float> %vec) nounwind { 694*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_s: 695*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 696*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.s s0, s0, v1[3] 697*9880d681SAndroid Build Coastguard Worker %B = extractelement <4 x float> %vec, i32 3 698*9880d681SAndroid Build Coastguard Worker %res = fmul float %A, %B 699*9880d681SAndroid Build Coastguard Worker ret float %res 700*9880d681SAndroid Build Coastguard Worker} 701*9880d681SAndroid Build Coastguard Worker 702*9880d681SAndroid Build Coastguard Workerdefine double @fmul_lane_d(double %A, <2 x double> %vec) nounwind { 703*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmul_lane_d: 704*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 705*9880d681SAndroid Build Coastguard Worker;CHECK: fmul.d d0, d0, v1[1] 706*9880d681SAndroid Build Coastguard Worker %B = extractelement <2 x double> %vec, i32 1 707*9880d681SAndroid Build Coastguard Worker %res = fmul double %A, %B 708*9880d681SAndroid Build Coastguard Worker ret double %res 709*9880d681SAndroid Build Coastguard Worker} 710*9880d681SAndroid Build Coastguard Worker 711*9880d681SAndroid Build Coastguard Worker 712*9880d681SAndroid Build Coastguard Worker 713*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 714*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_2s: 715*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 716*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2s 717*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 718*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 719*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1> 720*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3) 721*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp4 722*9880d681SAndroid Build Coastguard Worker} 723*9880d681SAndroid Build Coastguard Worker 724*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 725*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_4s: 726*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 727*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.4s 728*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 729*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 730*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 731*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3) 732*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp4 733*9880d681SAndroid Build Coastguard Worker} 734*9880d681SAndroid Build Coastguard Worker 735*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 736*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: fmulx_lane_2d: 737*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 738*9880d681SAndroid Build Coastguard Worker;CHECK: fmulx.2d 739*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x double>, <2 x double>* %A 740*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x double>, <2 x double>* %B 741*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1> 742*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3) 743*9880d681SAndroid Build Coastguard Worker ret <2 x double> %tmp4 744*9880d681SAndroid Build Coastguard Worker} 745*9880d681SAndroid Build Coastguard Worker 746*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 747*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_4h: 748*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 749*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4h 750*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 751*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 752*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 753*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) 754*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp4 755*9880d681SAndroid Build Coastguard Worker} 756*9880d681SAndroid Build Coastguard Worker 757*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 758*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_8h: 759*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 760*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.8h 761*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 762*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 763*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 764*9880d681SAndroid Build Coastguard Worker %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) 765*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 766*9880d681SAndroid Build Coastguard Worker} 767*9880d681SAndroid Build Coastguard Worker 768*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 769*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_2s: 770*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 771*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.2s 772*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 773*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 774*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 775*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) 776*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp4 777*9880d681SAndroid Build Coastguard Worker} 778*9880d681SAndroid Build Coastguard Worker 779*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 780*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_4s: 781*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 782*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.4s 783*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 784*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 785*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 786*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) 787*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 788*9880d681SAndroid Build Coastguard Worker} 789*9880d681SAndroid Build Coastguard Worker 790*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { 791*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmulh_lane_1s: 792*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 793*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1] 794*9880d681SAndroid Build Coastguard Worker %tmp1 = extractelement <4 x i32> %B, i32 1 795*9880d681SAndroid Build Coastguard Worker %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1) 796*9880d681SAndroid Build Coastguard Worker ret i32 %tmp2 797*9880d681SAndroid Build Coastguard Worker} 798*9880d681SAndroid Build Coastguard Worker 799*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 800*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_4h: 801*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 802*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4h 803*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 804*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 805*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 806*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3) 807*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp4 808*9880d681SAndroid Build Coastguard Worker} 809*9880d681SAndroid Build Coastguard Worker 810*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 811*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_8h: 812*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 813*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.8h 814*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 815*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 816*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 817*9880d681SAndroid Build Coastguard Worker %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3) 818*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 819*9880d681SAndroid Build Coastguard Worker} 820*9880d681SAndroid Build Coastguard Worker 821*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 822*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_2s: 823*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 824*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.2s 825*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 826*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 827*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 828*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3) 829*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp4 830*9880d681SAndroid Build Coastguard Worker} 831*9880d681SAndroid Build Coastguard Worker 832*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 833*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_4s: 834*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 835*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.4s 836*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 837*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 838*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 839*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3) 840*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 841*9880d681SAndroid Build Coastguard Worker} 842*9880d681SAndroid Build Coastguard Worker 843*9880d681SAndroid Build Coastguard Workerdefine i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind { 844*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqrdmulh_lane_1s: 845*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 846*9880d681SAndroid Build Coastguard Worker;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1] 847*9880d681SAndroid Build Coastguard Worker %tmp1 = extractelement <4 x i32> %B, i32 1 848*9880d681SAndroid Build Coastguard Worker %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1) 849*9880d681SAndroid Build Coastguard Worker ret i32 %tmp2 850*9880d681SAndroid Build Coastguard Worker} 851*9880d681SAndroid Build Coastguard Worker 852*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 853*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull_lane_4s: 854*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 855*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.4s 856*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 857*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 858*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 859*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) 860*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 861*9880d681SAndroid Build Coastguard Worker} 862*9880d681SAndroid Build Coastguard Worker 863*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 864*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull_lane_2d: 865*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 866*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull.2d 867*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 868*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 869*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 870*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) 871*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 872*9880d681SAndroid Build Coastguard Worker} 873*9880d681SAndroid Build Coastguard Worker 874*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 875*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_lane_4s: 876*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 877*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.4s 878*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 879*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 880*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 881*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 882*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 883*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 884*9880d681SAndroid Build Coastguard Worker} 885*9880d681SAndroid Build Coastguard Worker 886*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 887*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmull2_lane_2d: 888*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 889*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmull2.2d 890*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 891*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 892*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 893*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 894*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 895*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 896*9880d681SAndroid Build Coastguard Worker} 897*9880d681SAndroid Build Coastguard Worker 898*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 899*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull_lane_4s: 900*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 901*9880d681SAndroid Build Coastguard Worker;CHECK: umull.4s 902*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 903*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 904*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 905*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) 906*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 907*9880d681SAndroid Build Coastguard Worker} 908*9880d681SAndroid Build Coastguard Worker 909*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 910*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umull_lane_2d: 911*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 912*9880d681SAndroid Build Coastguard Worker;CHECK: umull.2d 913*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 914*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 915*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 916*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) 917*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 918*9880d681SAndroid Build Coastguard Worker} 919*9880d681SAndroid Build Coastguard Worker 920*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 921*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull_lane_4s: 922*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 923*9880d681SAndroid Build Coastguard Worker;CHECK: smull.4s 924*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 925*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 926*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 927*9880d681SAndroid Build Coastguard Worker %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) 928*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 929*9880d681SAndroid Build Coastguard Worker} 930*9880d681SAndroid Build Coastguard Worker 931*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 932*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smull_lane_2d: 933*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 934*9880d681SAndroid Build Coastguard Worker;CHECK: smull.2d 935*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 936*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 937*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 938*9880d681SAndroid Build Coastguard Worker %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) 939*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 940*9880d681SAndroid Build Coastguard Worker} 941*9880d681SAndroid Build Coastguard Worker 942*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 943*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal_lane_4s: 944*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 945*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.4s 946*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 947*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 948*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 949*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 950*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 951*9880d681SAndroid Build Coastguard Worker %tmp6 = add <4 x i32> %tmp3, %tmp5 952*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 953*9880d681SAndroid Build Coastguard Worker} 954*9880d681SAndroid Build Coastguard Worker 955*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 956*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlal_lane_2d: 957*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 958*9880d681SAndroid Build Coastguard Worker;CHECK: smlal.2d 959*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 960*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 961*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 962*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 963*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 964*9880d681SAndroid Build Coastguard Worker %tmp6 = add <2 x i64> %tmp3, %tmp5 965*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 966*9880d681SAndroid Build Coastguard Worker} 967*9880d681SAndroid Build Coastguard Worker 968*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 969*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_4s: 970*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 971*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s 972*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 973*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 974*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 975*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 976*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 977*9880d681SAndroid Build Coastguard Worker %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) 978*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 979*9880d681SAndroid Build Coastguard Worker} 980*9880d681SAndroid Build Coastguard Worker 981*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 982*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_2d: 983*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 984*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.2d 985*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 986*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 987*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 988*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 989*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 990*9880d681SAndroid Build Coastguard Worker %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) 991*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 992*9880d681SAndroid Build Coastguard Worker} 993*9880d681SAndroid Build Coastguard Worker 994*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 995*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_lane_4s: 996*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 997*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.4s 998*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 999*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 1000*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1001*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1002*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1003*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 1004*9880d681SAndroid Build Coastguard Worker %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) 1005*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1006*9880d681SAndroid Build Coastguard Worker} 1007*9880d681SAndroid Build Coastguard Worker 1008*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 1009*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal2_lane_2d: 1010*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1011*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal2.2d 1012*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 1013*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 1014*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1015*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1016*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 1017*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 1018*9880d681SAndroid Build Coastguard Worker %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) 1019*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1020*9880d681SAndroid Build Coastguard Worker} 1021*9880d681SAndroid Build Coastguard Worker 1022*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { 1023*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_1s: 1024*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.4s 1025*9880d681SAndroid Build Coastguard Worker %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 1026*9880d681SAndroid Build Coastguard Worker %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1027*9880d681SAndroid Build Coastguard Worker %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) 1028*9880d681SAndroid Build Coastguard Worker %prod = extractelement <4 x i32> %prod.vec, i32 0 1029*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) 1030*9880d681SAndroid Build Coastguard Worker ret i32 %res 1031*9880d681SAndroid Build Coastguard Worker} 1032*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) 1033*9880d681SAndroid Build Coastguard Worker 1034*9880d681SAndroid Build Coastguard Workerdefine i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { 1035*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_1s: 1036*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s 1037*9880d681SAndroid Build Coastguard Worker %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 1038*9880d681SAndroid Build Coastguard Worker %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1039*9880d681SAndroid Build Coastguard Worker %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) 1040*9880d681SAndroid Build Coastguard Worker %prod = extractelement <4 x i32> %prod.vec, i32 0 1041*9880d681SAndroid Build Coastguard Worker %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) 1042*9880d681SAndroid Build Coastguard Worker ret i32 %res 1043*9880d681SAndroid Build Coastguard Worker} 1044*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) 1045*9880d681SAndroid Build Coastguard Worker 1046*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { 1047*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_lane_1d: 1048*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal.s 1049*9880d681SAndroid Build Coastguard Worker %rhs = extractelement <2 x i32> %C, i32 1 1050*9880d681SAndroid Build Coastguard Worker %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) 1051*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod) 1052*9880d681SAndroid Build Coastguard Worker ret i64 %res 1053*9880d681SAndroid Build Coastguard Worker} 1054*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32) 1055*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) 1056*9880d681SAndroid Build Coastguard Worker 1057*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { 1058*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_1d: 1059*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.s 1060*9880d681SAndroid Build Coastguard Worker %rhs = extractelement <2 x i32> %C, i32 1 1061*9880d681SAndroid Build Coastguard Worker %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) 1062*9880d681SAndroid Build Coastguard Worker %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod) 1063*9880d681SAndroid Build Coastguard Worker ret i64 %res 1064*9880d681SAndroid Build Coastguard Worker} 1065*9880d681SAndroid Build Coastguard Workerdeclare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) 1066*9880d681SAndroid Build Coastguard Worker 1067*9880d681SAndroid Build Coastguard Worker 1068*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 1069*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal_lane_4s: 1070*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1071*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.4s 1072*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 1073*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 1074*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1075*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1076*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 1077*9880d681SAndroid Build Coastguard Worker %tmp6 = add <4 x i32> %tmp3, %tmp5 1078*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1079*9880d681SAndroid Build Coastguard Worker} 1080*9880d681SAndroid Build Coastguard Worker 1081*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 1082*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlal_lane_2d: 1083*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1084*9880d681SAndroid Build Coastguard Worker;CHECK: umlal.2d 1085*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 1086*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 1087*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1088*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 1089*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 1090*9880d681SAndroid Build Coastguard Worker %tmp6 = add <2 x i64> %tmp3, %tmp5 1091*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1092*9880d681SAndroid Build Coastguard Worker} 1093*9880d681SAndroid Build Coastguard Worker 1094*9880d681SAndroid Build Coastguard Worker 1095*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 1096*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl_lane_4s: 1097*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1098*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.4s 1099*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 1100*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 1101*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1102*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1103*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 1104*9880d681SAndroid Build Coastguard Worker %tmp6 = sub <4 x i32> %tmp3, %tmp5 1105*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1106*9880d681SAndroid Build Coastguard Worker} 1107*9880d681SAndroid Build Coastguard Worker 1108*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 1109*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: smlsl_lane_2d: 1110*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1111*9880d681SAndroid Build Coastguard Worker;CHECK: smlsl.2d 1112*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 1113*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 1114*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1115*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 1116*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 1117*9880d681SAndroid Build Coastguard Worker %tmp6 = sub <2 x i64> %tmp3, %tmp5 1118*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1119*9880d681SAndroid Build Coastguard Worker} 1120*9880d681SAndroid Build Coastguard Worker 1121*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 1122*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_4s: 1123*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1124*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.4s 1125*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 1126*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 1127*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1128*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1129*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 1130*9880d681SAndroid Build Coastguard Worker %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) 1131*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1132*9880d681SAndroid Build Coastguard Worker} 1133*9880d681SAndroid Build Coastguard Worker 1134*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 1135*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_lane_2d: 1136*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1137*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl.2d 1138*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 1139*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 1140*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1141*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 1142*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 1143*9880d681SAndroid Build Coastguard Worker %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) 1144*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1145*9880d681SAndroid Build Coastguard Worker} 1146*9880d681SAndroid Build Coastguard Worker 1147*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 1148*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_lane_4s: 1149*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1150*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.4s 1151*9880d681SAndroid Build Coastguard Worker %load1 = load <8 x i16>, <8 x i16>* %A 1152*9880d681SAndroid Build Coastguard Worker %load2 = load <8 x i16>, <8 x i16>* %B 1153*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1154*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1155*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1156*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) 1157*9880d681SAndroid Build Coastguard Worker %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5) 1158*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1159*9880d681SAndroid Build Coastguard Worker} 1160*9880d681SAndroid Build Coastguard Worker 1161*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 1162*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl2_lane_2d: 1163*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1164*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl2.2d 1165*9880d681SAndroid Build Coastguard Worker %load1 = load <4 x i32>, <4 x i32>* %A 1166*9880d681SAndroid Build Coastguard Worker %load2 = load <4 x i32>, <4 x i32>* %B 1167*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1168*9880d681SAndroid Build Coastguard Worker %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1169*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 1170*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) 1171*9880d681SAndroid Build Coastguard Worker %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5) 1172*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1173*9880d681SAndroid Build Coastguard Worker} 1174*9880d681SAndroid Build Coastguard Worker 1175*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 1176*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl_lane_4s: 1177*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1178*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.4s 1179*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 1180*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 1181*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i32>, <4 x i32>* %C 1182*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1183*9880d681SAndroid Build Coastguard Worker %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4) 1184*9880d681SAndroid Build Coastguard Worker %tmp6 = sub <4 x i32> %tmp3, %tmp5 1185*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp6 1186*9880d681SAndroid Build Coastguard Worker} 1187*9880d681SAndroid Build Coastguard Worker 1188*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 1189*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: umlsl_lane_2d: 1190*9880d681SAndroid Build Coastguard Worker;CHECK-NOT: dup 1191*9880d681SAndroid Build Coastguard Worker;CHECK: umlsl.2d 1192*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 1193*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 1194*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i64>, <2 x i64>* %C 1195*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1> 1196*9880d681SAndroid Build Coastguard Worker %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4) 1197*9880d681SAndroid Build Coastguard Worker %tmp6 = sub <2 x i64> %tmp3, %tmp5 1198*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp6 1199*9880d681SAndroid Build Coastguard Worker} 1200*9880d681SAndroid Build Coastguard Worker 1201*9880d681SAndroid Build Coastguard Worker; Scalar FMULX 1202*9880d681SAndroid Build Coastguard Workerdefine float @fmulxs(float %a, float %b) nounwind { 1203*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxs: 1204*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx s0, s0, s1 1205*9880d681SAndroid Build Coastguard Worker %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind 1206*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret 1207*9880d681SAndroid Build Coastguard Worker ret float %fmulx.i 1208*9880d681SAndroid Build Coastguard Worker} 1209*9880d681SAndroid Build Coastguard Worker 1210*9880d681SAndroid Build Coastguard Workerdefine double @fmulxd(double %a, double %b) nounwind { 1211*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxd: 1212*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx d0, d0, d1 1213*9880d681SAndroid Build Coastguard Worker %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind 1214*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret 1215*9880d681SAndroid Build Coastguard Worker ret double %fmulx.i 1216*9880d681SAndroid Build Coastguard Worker} 1217*9880d681SAndroid Build Coastguard Worker 1218*9880d681SAndroid Build Coastguard Workerdefine float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { 1219*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxs_lane: 1220*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx.s s0, s0, v1[3] 1221*9880d681SAndroid Build Coastguard Worker %b = extractelement <4 x float> %vec, i32 3 1222*9880d681SAndroid Build Coastguard Worker %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind 1223*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret 1224*9880d681SAndroid Build Coastguard Worker ret float %fmulx.i 1225*9880d681SAndroid Build Coastguard Worker} 1226*9880d681SAndroid Build Coastguard Worker 1227*9880d681SAndroid Build Coastguard Workerdefine double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { 1228*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmulxd_lane: 1229*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: fmulx d0, d0, v1[1] 1230*9880d681SAndroid Build Coastguard Worker %b = extractelement <2 x double> %vec, i32 1 1231*9880d681SAndroid Build Coastguard Worker %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind 1232*9880d681SAndroid Build Coastguard Worker; CHECKNEXT: ret 1233*9880d681SAndroid Build Coastguard Worker ret double %fmulx.i 1234*9880d681SAndroid Build Coastguard Worker} 1235*9880d681SAndroid Build Coastguard Worker 1236*9880d681SAndroid Build Coastguard Workerdeclare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone 1237*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone 1238*9880d681SAndroid Build Coastguard Worker 1239*9880d681SAndroid Build Coastguard Worker 1240*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind { 1241*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull2_8h_simple: 1242*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.8h v0, v0, v1 1243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1244*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1245*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1246*9880d681SAndroid Build Coastguard Worker %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2 1247*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %3 1248*9880d681SAndroid Build Coastguard Worker} 1249*9880d681SAndroid Build Coastguard Worker 1250*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind { 1251*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo0: 1252*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.8h v0, v0, v1 1253*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <16 x i8> %a to <2 x i64> 1254*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1255*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8> 1256*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <16 x i8> %b to <2 x i64> 1257*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1258*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> 1259*9880d681SAndroid Build Coastguard Worker %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind 1260*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %vmull.i.i 1261*9880d681SAndroid Build Coastguard Worker} 1262*9880d681SAndroid Build Coastguard Worker 1263*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind { 1264*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo1: 1265*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.4s v0, v0, v1 1266*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %a to <2 x i64> 1267*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1268*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1269*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %b to <2 x i64> 1270*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1271*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 1272*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1273*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i.i 1274*9880d681SAndroid Build Coastguard Worker} 1275*9880d681SAndroid Build Coastguard Worker 1276*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind { 1277*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo2: 1278*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.2d v0, v0, v1 1279*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %a to <2 x i64> 1280*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1281*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1282*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %b to <2 x i64> 1283*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1284*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 1285*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1286*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i.i 1287*9880d681SAndroid Build Coastguard Worker} 1288*9880d681SAndroid Build Coastguard Worker 1289*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind { 1290*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo3: 1291*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.8h v0, v0, v1 1292*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <16 x i8> %a to <2 x i64> 1293*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1294*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8> 1295*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <16 x i8> %b to <2 x i64> 1296*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1297*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> 1298*9880d681SAndroid Build Coastguard Worker %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind 1299*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %vmull.i.i 1300*9880d681SAndroid Build Coastguard Worker} 1301*9880d681SAndroid Build Coastguard Worker 1302*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind { 1303*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo4: 1304*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.4s v0, v0, v1 1305*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %a to <2 x i64> 1306*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1307*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1308*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %b to <2 x i64> 1309*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1310*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 1311*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1312*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i.i 1313*9880d681SAndroid Build Coastguard Worker} 1314*9880d681SAndroid Build Coastguard Worker 1315*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind { 1316*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo5: 1317*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.2d v0, v0, v1 1318*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %a to <2 x i64> 1319*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1320*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1321*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %b to <2 x i64> 1322*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1323*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 1324*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1325*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i.i 1326*9880d681SAndroid Build Coastguard Worker} 1327*9880d681SAndroid Build Coastguard Worker 1328*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 1329*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo6: 1330*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.4s v0, v1, v2[1] 1331*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1332*9880d681SAndroid Build Coastguard Workerentry: 1333*9880d681SAndroid Build Coastguard Worker %0 = bitcast <8 x i16> %b to <2 x i64> 1334*9880d681SAndroid Build Coastguard Worker %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1335*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 1336*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1337*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 1338*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i 1339*9880d681SAndroid Build Coastguard Worker} 1340*9880d681SAndroid Build Coastguard Worker 1341*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 1342*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo7: 1343*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: smull2.2d v0, v1, v2[1] 1344*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1345*9880d681SAndroid Build Coastguard Workerentry: 1346*9880d681SAndroid Build Coastguard Worker %0 = bitcast <4 x i32> %b to <2 x i64> 1347*9880d681SAndroid Build Coastguard Worker %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1348*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 1349*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1350*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 1351*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i 1352*9880d681SAndroid Build Coastguard Worker} 1353*9880d681SAndroid Build Coastguard Worker 1354*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 1355*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo8: 1356*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: umull2.4s v0, v1, v2[1] 1357*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1358*9880d681SAndroid Build Coastguard Workerentry: 1359*9880d681SAndroid Build Coastguard Worker %0 = bitcast <8 x i16> %b to <2 x i64> 1360*9880d681SAndroid Build Coastguard Worker %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1361*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 1362*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1363*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 1364*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i 1365*9880d681SAndroid Build Coastguard Worker} 1366*9880d681SAndroid Build Coastguard Worker 1367*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 1368*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: foo9: 1369*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: umull2.2d v0, v1, v2[1] 1370*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1371*9880d681SAndroid Build Coastguard Workerentry: 1372*9880d681SAndroid Build Coastguard Worker %0 = bitcast <4 x i32> %b to <2 x i64> 1373*9880d681SAndroid Build Coastguard Worker %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1374*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 1375*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1376*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 1377*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i 1378*9880d681SAndroid Build Coastguard Worker} 1379*9880d681SAndroid Build Coastguard Worker 1380*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { 1381*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar0: 1382*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.8h v0, v1, v2 1383*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1384*9880d681SAndroid Build Coastguard Worker 1385*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <16 x i8> %b to <2 x i64> 1386*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1387*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> 1388*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <16 x i8> %c to <2 x i64> 1389*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1390*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> 1391*9880d681SAndroid Build Coastguard Worker %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind 1392*9880d681SAndroid Build Coastguard Worker %add.i = add <8 x i16> %vmull.i.i.i, %a 1393*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %add.i 1394*9880d681SAndroid Build Coastguard Worker} 1395*9880d681SAndroid Build Coastguard Worker 1396*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { 1397*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar1: 1398*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.4s v0, v1, v2 1399*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1400*9880d681SAndroid Build Coastguard Worker 1401*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %b to <2 x i64> 1402*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1403*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> 1404*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %c to <2 x i64> 1405*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1406*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> 1407*9880d681SAndroid Build Coastguard Worker %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1408*9880d681SAndroid Build Coastguard Worker %add.i = add <4 x i32> %vmull2.i.i.i, %a 1409*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %add.i 1410*9880d681SAndroid Build Coastguard Worker} 1411*9880d681SAndroid Build Coastguard Worker 1412*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { 1413*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar2: 1414*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.2d v0, v1, v2 1415*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1416*9880d681SAndroid Build Coastguard Worker 1417*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %b to <2 x i64> 1418*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1419*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> 1420*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %c to <2 x i64> 1421*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1422*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> 1423*9880d681SAndroid Build Coastguard Worker %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1424*9880d681SAndroid Build Coastguard Worker %add.i = add <2 x i64> %vmull2.i.i.i, %a 1425*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %add.i 1426*9880d681SAndroid Build Coastguard Worker} 1427*9880d681SAndroid Build Coastguard Worker 1428*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind { 1429*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar3: 1430*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.8h v0, v1, v2 1431*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1432*9880d681SAndroid Build Coastguard Worker 1433*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <16 x i8> %b to <2 x i64> 1434*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1435*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> 1436*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <16 x i8> %c to <2 x i64> 1437*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1438*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> 1439*9880d681SAndroid Build Coastguard Worker %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind 1440*9880d681SAndroid Build Coastguard Worker %add.i = add <8 x i16> %vmull.i.i.i, %a 1441*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %add.i 1442*9880d681SAndroid Build Coastguard Worker} 1443*9880d681SAndroid Build Coastguard Worker 1444*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind { 1445*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar4: 1446*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.4s v0, v1, v2 1447*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1448*9880d681SAndroid Build Coastguard Worker 1449*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %b to <2 x i64> 1450*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1451*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> 1452*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %c to <2 x i64> 1453*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1454*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> 1455*9880d681SAndroid Build Coastguard Worker %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1456*9880d681SAndroid Build Coastguard Worker %add.i = add <4 x i32> %vmull2.i.i.i, %a 1457*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %add.i 1458*9880d681SAndroid Build Coastguard Worker} 1459*9880d681SAndroid Build Coastguard Worker 1460*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind { 1461*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: bar5: 1462*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d v0, v1, v2 1463*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1464*9880d681SAndroid Build Coastguard Worker 1465*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %b to <2 x i64> 1466*9880d681SAndroid Build Coastguard Worker %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1467*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> 1468*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %c to <2 x i64> 1469*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1470*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> 1471*9880d681SAndroid Build Coastguard Worker %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1472*9880d681SAndroid Build Coastguard Worker %add.i = add <2 x i64> %vmull2.i.i.i, %a 1473*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %add.i 1474*9880d681SAndroid Build Coastguard Worker} 1475*9880d681SAndroid Build Coastguard Worker 1476*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { 1477*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_1: 1478*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.4s v0, v1, v2[3] 1479*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1480*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1481*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %b to <2 x i64> 1482*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1483*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1484*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64> 1485*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1486*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 1487*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1488*9880d681SAndroid Build Coastguard Worker %add = add <4 x i32> %vmull2.i.i, %a 1489*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %add 1490*9880d681SAndroid Build Coastguard Worker} 1491*9880d681SAndroid Build Coastguard Worker 1492*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { 1493*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_2: 1494*9880d681SAndroid Build Coastguard Worker; CHECK: smlal2.2d v0, v1, v2[1] 1495*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1496*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1497*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %b to <2 x i64> 1498*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1499*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1500*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64> 1501*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1502*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 1503*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1504*9880d681SAndroid Build Coastguard Worker %add = add <2 x i64> %vmull2.i.i, %a 1505*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %add 1506*9880d681SAndroid Build Coastguard Worker} 1507*9880d681SAndroid Build Coastguard Worker 1508*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind { 1509*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_4: 1510*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.4s v0, v1, v2[2] 1511*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1512*9880d681SAndroid Build Coastguard Worker 1513*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1514*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <8 x i16> %b to <2 x i64> 1515*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1516*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1517*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64> 1518*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1519*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 1520*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind 1521*9880d681SAndroid Build Coastguard Worker %add = add <4 x i32> %vmull2.i.i, %a 1522*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %add 1523*9880d681SAndroid Build Coastguard Worker} 1524*9880d681SAndroid Build Coastguard Worker 1525*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind { 1526*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal2_5: 1527*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d v0, v1, v2[0] 1528*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1529*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer 1530*9880d681SAndroid Build Coastguard Worker %tmp = bitcast <4 x i32> %b to <2 x i64> 1531*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1> 1532*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1533*9880d681SAndroid Build Coastguard Worker %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64> 1534*9880d681SAndroid Build Coastguard Worker %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1> 1535*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 1536*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind 1537*9880d681SAndroid Build Coastguard Worker %add = add <2 x i64> %vmull2.i.i, %a 1538*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %add 1539*9880d681SAndroid Build Coastguard Worker} 1540*9880d681SAndroid Build Coastguard Worker 1541*9880d681SAndroid Build Coastguard Worker; rdar://12328502 1542*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp { 1543*9880d681SAndroid Build Coastguard Workerentry: 1544*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_n_f64: 1545*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.2d 1546*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.2d v0, v0, v1[0] 1547*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <2 x double> undef, double %y, i32 0 1548*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <2 x double> %vecinit.i, double %y, i32 1 1549*9880d681SAndroid Build Coastguard Worker %mul.i = fmul <2 x double> %vecinit1.i, %x 1550*9880d681SAndroid Build Coastguard Worker ret <2 x double> %mul.i 1551*9880d681SAndroid Build Coastguard Worker} 1552*9880d681SAndroid Build Coastguard Worker 1553*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp { 1554*9880d681SAndroid Build Coastguard Workerentry: 1555*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_n_f32: 1556*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.4s 1557*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.4s v0, v0, v1[0] 1558*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <4 x float> undef, float %y, i32 0 1559*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <4 x float> %vecinit.i, float %y, i32 1 1560*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %y, i32 2 1561*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %y, i32 3 1562*9880d681SAndroid Build Coastguard Worker %mul.i = fmul <4 x float> %vecinit3.i, %x 1563*9880d681SAndroid Build Coastguard Worker ret <4 x float> %mul.i 1564*9880d681SAndroid Build Coastguard Worker} 1565*9880d681SAndroid Build Coastguard Worker 1566*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp { 1567*9880d681SAndroid Build Coastguard Workerentry: 1568*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_n_f32: 1569*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup.2s 1570*9880d681SAndroid Build Coastguard Worker; CHECK: fmul.2s v0, v0, v1[0] 1571*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <2 x float> undef, float %y, i32 0 1572*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <2 x float> %vecinit.i, float %y, i32 1 1573*9880d681SAndroid Build Coastguard Worker %mul.i = fmul <2 x float> %vecinit1.i, %x 1574*9880d681SAndroid Build Coastguard Worker ret <2 x float> %mul.i 1575*9880d681SAndroid Build Coastguard Worker} 1576*9880d681SAndroid Build Coastguard Worker 1577*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp { 1578*9880d681SAndroid Build Coastguard Workerentry: 1579*9880d681SAndroid Build Coastguard Worker; CHECK: vmla_laneq_s16_test 1580*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1581*9880d681SAndroid Build Coastguard Worker; CHECK: mla.4h v0, v1, v2[6] 1582*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1583*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 1584*9880d681SAndroid Build Coastguard Worker %mul = mul <4 x i16> %shuffle, %b 1585*9880d681SAndroid Build Coastguard Worker %add = add <4 x i16> %mul, %a 1586*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %add 1587*9880d681SAndroid Build Coastguard Worker} 1588*9880d681SAndroid Build Coastguard Worker 1589*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp { 1590*9880d681SAndroid Build Coastguard Workerentry: 1591*9880d681SAndroid Build Coastguard Worker; CHECK: vmla_laneq_s32_test 1592*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1593*9880d681SAndroid Build Coastguard Worker; CHECK: mla.2s v0, v1, v2[3] 1594*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1595*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1596*9880d681SAndroid Build Coastguard Worker %mul = mul <2 x i32> %shuffle, %b 1597*9880d681SAndroid Build Coastguard Worker %add = add <2 x i32> %mul, %a 1598*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %add 1599*9880d681SAndroid Build Coastguard Worker} 1600*9880d681SAndroid Build Coastguard Worker 1601*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @not_really_vmlaq_laneq_s16_test(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone ssp { 1602*9880d681SAndroid Build Coastguard Workerentry: 1603*9880d681SAndroid Build Coastguard Worker; CHECK: not_really_vmlaq_laneq_s16_test 1604*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1605*9880d681SAndroid Build Coastguard Worker; CHECK: mla.8h v0, v1, v2[5] 1606*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1607*9880d681SAndroid Build Coastguard Worker %shuffle1 = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1608*9880d681SAndroid Build Coastguard Worker %shuffle2 = shufflevector <4 x i16> %shuffle1, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1609*9880d681SAndroid Build Coastguard Worker %mul = mul <8 x i16> %shuffle2, %b 1610*9880d681SAndroid Build Coastguard Worker %add = add <8 x i16> %mul, %a 1611*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %add 1612*9880d681SAndroid Build Coastguard Worker} 1613*9880d681SAndroid Build Coastguard Worker 1614*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @not_really_vmlaq_laneq_s32_test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone ssp { 1615*9880d681SAndroid Build Coastguard Workerentry: 1616*9880d681SAndroid Build Coastguard Worker; CHECK: not_really_vmlaq_laneq_s32_test 1617*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1618*9880d681SAndroid Build Coastguard Worker; CHECK: mla.4s v0, v1, v2[3] 1619*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1620*9880d681SAndroid Build Coastguard Worker %shuffle1 = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1621*9880d681SAndroid Build Coastguard Worker %shuffle2 = shufflevector <2 x i32> %shuffle1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1622*9880d681SAndroid Build Coastguard Worker %mul = mul <4 x i32> %shuffle2, %b 1623*9880d681SAndroid Build Coastguard Worker %add = add <4 x i32> %mul, %a 1624*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %add 1625*9880d681SAndroid Build Coastguard Worker} 1626*9880d681SAndroid Build Coastguard Worker 1627*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { 1628*9880d681SAndroid Build Coastguard Workerentry: 1629*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_s16_test 1630*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1631*9880d681SAndroid Build Coastguard Worker; CHECK: smull.4s v0, v0, v1[6] 1632*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1633*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 1634*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 1635*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i 1636*9880d681SAndroid Build Coastguard Worker} 1637*9880d681SAndroid Build Coastguard Worker 1638*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { 1639*9880d681SAndroid Build Coastguard Workerentry: 1640*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_s32_test 1641*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1642*9880d681SAndroid Build Coastguard Worker; CHECK: smull.2d v0, v0, v1[2] 1643*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1644*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> 1645*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 1646*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i 1647*9880d681SAndroid Build Coastguard Worker} 1648*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { 1649*9880d681SAndroid Build Coastguard Workerentry: 1650*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_u16_test 1651*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1652*9880d681SAndroid Build Coastguard Worker; CHECK: umull.4s v0, v0, v1[6] 1653*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1654*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 1655*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 1656*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i 1657*9880d681SAndroid Build Coastguard Worker} 1658*9880d681SAndroid Build Coastguard Worker 1659*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { 1660*9880d681SAndroid Build Coastguard Workerentry: 1661*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_laneq_u32_test 1662*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1663*9880d681SAndroid Build Coastguard Worker; CHECK: umull.2d v0, v0, v1[2] 1664*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1665*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> 1666*9880d681SAndroid Build Coastguard Worker %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 1667*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i 1668*9880d681SAndroid Build Coastguard Worker} 1669*9880d681SAndroid Build Coastguard Worker 1670*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { 1671*9880d681SAndroid Build Coastguard Workerentry: 1672*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_s16_test 1673*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1674*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.4s 1675*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1676*9880d681SAndroid Build Coastguard Worker %conv = trunc i32 %d to i16 1677*9880d681SAndroid Build Coastguard Worker %0 = bitcast <8 x i16> %b to <2 x i64> 1678*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1679*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1680*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0 1681*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 1682*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 1683*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 1684*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind 1685*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i.i 1686*9880d681SAndroid Build Coastguard Worker} 1687*9880d681SAndroid Build Coastguard Worker 1688*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { 1689*9880d681SAndroid Build Coastguard Workerentry: 1690*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_s32_test 1691*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1692*9880d681SAndroid Build Coastguard Worker; CHECK: smull2.2d 1693*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1694*9880d681SAndroid Build Coastguard Worker %0 = bitcast <4 x i32> %b to <2 x i64> 1695*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1696*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1697*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 1698*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 1699*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind 1700*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i.i 1701*9880d681SAndroid Build Coastguard Worker} 1702*9880d681SAndroid Build Coastguard Worker 1703*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { 1704*9880d681SAndroid Build Coastguard Workerentry: 1705*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_u16_test 1706*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1707*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.4s 1708*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1709*9880d681SAndroid Build Coastguard Worker %conv = trunc i32 %d to i16 1710*9880d681SAndroid Build Coastguard Worker %0 = bitcast <8 x i16> %b to <2 x i64> 1711*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1712*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 1713*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0 1714*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 1715*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 1716*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 1717*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind 1718*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vmull2.i.i 1719*9880d681SAndroid Build Coastguard Worker} 1720*9880d681SAndroid Build Coastguard Worker 1721*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { 1722*9880d681SAndroid Build Coastguard Workerentry: 1723*9880d681SAndroid Build Coastguard Worker; CHECK: vmull_high_n_u32_test 1724*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1725*9880d681SAndroid Build Coastguard Worker; CHECK: umull2.2d 1726*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret 1727*9880d681SAndroid Build Coastguard Worker %0 = bitcast <4 x i32> %b to <2 x i64> 1728*9880d681SAndroid Build Coastguard Worker %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 1729*9880d681SAndroid Build Coastguard Worker %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 1730*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 1731*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 1732*9880d681SAndroid Build Coastguard Worker %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind 1733*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %vmull2.i.i 1734*9880d681SAndroid Build Coastguard Worker} 1735*9880d681SAndroid Build Coastguard Worker 1736*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) { 1737*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_built_dup_test: 1738*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins 1739*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup 1740*9880d681SAndroid Build Coastguard Worker; CHECK: mul.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[1] 1741*9880d681SAndroid Build Coastguard Worker %vget_lane = extractelement <4 x i32> %b, i32 1 1742*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1743*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1744*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1745*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1746*9880d681SAndroid Build Coastguard Worker %prod = mul <4 x i32> %a, %vecinit3.i 1747*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %prod 1748*9880d681SAndroid Build Coastguard Worker} 1749*9880d681SAndroid Build Coastguard Worker 1750*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) { 1751*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmul_built_dup_fromsmall_test: 1752*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins 1753*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup 1754*9880d681SAndroid Build Coastguard Worker; CHECK: mul.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[3] 1755*9880d681SAndroid Build Coastguard Worker %vget_lane = extractelement <4 x i16> %b, i32 3 1756*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1757*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1758*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1759*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1760*9880d681SAndroid Build Coastguard Worker %prod = mul <4 x i16> %a, %vecinit3.i 1761*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %prod 1762*9880d681SAndroid Build Coastguard Worker} 1763*9880d681SAndroid Build Coastguard Worker 1764*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) { 1765*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vmulq_built_dup_fromsmall_test: 1766*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ins 1767*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: dup 1768*9880d681SAndroid Build Coastguard Worker; CHECK: mul.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] 1769*9880d681SAndroid Build Coastguard Worker %vget_lane = extractelement <4 x i16> %b, i32 0 1770*9880d681SAndroid Build Coastguard Worker %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1771*9880d681SAndroid Build Coastguard Worker %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1772*9880d681SAndroid Build Coastguard Worker %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1773*9880d681SAndroid Build Coastguard Worker %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1774*9880d681SAndroid Build Coastguard Worker %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1775*9880d681SAndroid Build Coastguard Worker %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1776*9880d681SAndroid Build Coastguard Worker %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1777*9880d681SAndroid Build Coastguard Worker %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1778*9880d681SAndroid Build Coastguard Worker %prod = mul <8 x i16> %a, %vecinit7.i 1779*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %prod 1780*9880d681SAndroid Build Coastguard Worker} 1781*9880d681SAndroid Build Coastguard Worker 1782*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) { 1783*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mull_from_two_extracts: 1784*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1785*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d 1786*9880d681SAndroid Build Coastguard Worker 1787*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1788*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1789*9880d681SAndroid Build Coastguard Worker 1790*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 1791*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %res 1792*9880d681SAndroid Build Coastguard Worker} 1793*9880d681SAndroid Build Coastguard Worker 1794*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 1795*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mlal_from_two_extracts: 1796*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1797*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmlal2.2d 1798*9880d681SAndroid Build Coastguard Worker 1799*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1800*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1801*9880d681SAndroid Build Coastguard Worker 1802*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 1803*9880d681SAndroid Build Coastguard Worker %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) 1804*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %sum 1805*9880d681SAndroid Build Coastguard Worker} 1806*9880d681SAndroid Build Coastguard Worker 1807*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { 1808*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mull_from_extract_dup: 1809*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1810*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d 1811*9880d681SAndroid Build Coastguard Worker %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 1812*9880d681SAndroid Build Coastguard Worker %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 1813*9880d681SAndroid Build Coastguard Worker 1814*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1815*9880d681SAndroid Build Coastguard Worker 1816*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 1817*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %res 1818*9880d681SAndroid Build Coastguard Worker} 1819*9880d681SAndroid Build Coastguard Worker 1820*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) { 1821*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: pmull_from_extract_dup: 1822*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1823*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.8h 1824*9880d681SAndroid Build Coastguard Worker %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 1825*9880d681SAndroid Build Coastguard Worker %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1826*9880d681SAndroid Build Coastguard Worker 1827*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1828*9880d681SAndroid Build Coastguard Worker 1829*9880d681SAndroid Build Coastguard Worker %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind 1830*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %res 1831*9880d681SAndroid Build Coastguard Worker} 1832*9880d681SAndroid Build Coastguard Worker 1833*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) { 1834*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: pmull_from_extract_duplane: 1835*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1836*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.8h 1837*9880d681SAndroid Build Coastguard Worker 1838*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1839*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1840*9880d681SAndroid Build Coastguard Worker 1841*9880d681SAndroid Build Coastguard Worker %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind 1842*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %res 1843*9880d681SAndroid Build Coastguard Worker} 1844*9880d681SAndroid Build Coastguard Worker 1845*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) { 1846*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: sqdmull_from_extract_duplane: 1847*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1848*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmull2.2d 1849*9880d681SAndroid Build Coastguard Worker 1850*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1851*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 1852*9880d681SAndroid Build Coastguard Worker 1853*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 1854*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %res 1855*9880d681SAndroid Build Coastguard Worker} 1856*9880d681SAndroid Build Coastguard Worker 1857*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 1858*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: sqdmlal_from_extract_duplane: 1859*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1860*9880d681SAndroid Build Coastguard Worker; CHECK: sqdmlal2.2d 1861*9880d681SAndroid Build Coastguard Worker 1862*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1863*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 1864*9880d681SAndroid Build Coastguard Worker 1865*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 1866*9880d681SAndroid Build Coastguard Worker %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) 1867*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %sum 1868*9880d681SAndroid Build Coastguard Worker} 1869*9880d681SAndroid Build Coastguard Worker 1870*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 1871*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_from_extract_duplane: 1872*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: ext 1873*9880d681SAndroid Build Coastguard Worker; CHECK: umlal2.2d 1874*9880d681SAndroid Build Coastguard Worker 1875*9880d681SAndroid Build Coastguard Worker %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1876*9880d681SAndroid Build Coastguard Worker %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 1877*9880d681SAndroid Build Coastguard Worker 1878*9880d681SAndroid Build Coastguard Worker %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 1879*9880d681SAndroid Build Coastguard Worker %sum = add <2 x i64> %accum, %res 1880*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %sum 1881*9880d681SAndroid Build Coastguard Worker} 1882*9880d681SAndroid Build Coastguard Worker 1883*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { 1884*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v4f32: 1885*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.s s0, s1, v2[3] 1886*9880d681SAndroid Build Coastguard Worker %rhs = extractelement <4 x float> %rvec, i32 3 1887*9880d681SAndroid Build Coastguard Worker %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 1888*9880d681SAndroid Build Coastguard Worker ret float %res 1889*9880d681SAndroid Build Coastguard Worker} 1890*9880d681SAndroid Build Coastguard Worker 1891*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) { 1892*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v2f32: 1893*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.s s0, s1, v2[1] 1894*9880d681SAndroid Build Coastguard Worker %rhs = extractelement <2 x float> %rvec, i32 1 1895*9880d681SAndroid Build Coastguard Worker %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 1896*9880d681SAndroid Build Coastguard Worker ret float %res 1897*9880d681SAndroid Build Coastguard Worker} 1898*9880d681SAndroid Build Coastguard Worker 1899*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { 1900*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v4f32: 1901*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.s s0, s1, v2[3] 1902*9880d681SAndroid Build Coastguard Worker %rhs.scal = extractelement <4 x float> %rvec, i32 3 1903*9880d681SAndroid Build Coastguard Worker %rhs = fsub float -0.0, %rhs.scal 1904*9880d681SAndroid Build Coastguard Worker %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 1905*9880d681SAndroid Build Coastguard Worker ret float %res 1906*9880d681SAndroid Build Coastguard Worker} 1907*9880d681SAndroid Build Coastguard Worker 1908*9880d681SAndroid Build Coastguard Workerdefine float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) { 1909*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v2f32: 1910*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.s s0, s1, v2[1] 1911*9880d681SAndroid Build Coastguard Worker %rhs.scal = extractelement <2 x float> %rvec, i32 1 1912*9880d681SAndroid Build Coastguard Worker %rhs = fsub float -0.0, %rhs.scal 1913*9880d681SAndroid Build Coastguard Worker %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 1914*9880d681SAndroid Build Coastguard Worker ret float %res 1915*9880d681SAndroid Build Coastguard Worker} 1916*9880d681SAndroid Build Coastguard Worker 1917*9880d681SAndroid Build Coastguard Workerdeclare float @llvm.fma.f32(float, float, float) 1918*9880d681SAndroid Build Coastguard Worker 1919*9880d681SAndroid Build Coastguard Workerdefine double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { 1920*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmla_from_extract_v2f64: 1921*9880d681SAndroid Build Coastguard Worker; CHECK: fmla.d d0, d1, v2[1] 1922*9880d681SAndroid Build Coastguard Worker %rhs = extractelement <2 x double> %rvec, i32 1 1923*9880d681SAndroid Build Coastguard Worker %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum) 1924*9880d681SAndroid Build Coastguard Worker ret double %res 1925*9880d681SAndroid Build Coastguard Worker} 1926*9880d681SAndroid Build Coastguard Worker 1927*9880d681SAndroid Build Coastguard Workerdefine double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { 1928*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: scalar_fmls_from_extract_v2f64: 1929*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.d d0, d1, v2[1] 1930*9880d681SAndroid Build Coastguard Worker %rhs.scal = extractelement <2 x double> %rvec, i32 1 1931*9880d681SAndroid Build Coastguard Worker %rhs = fsub double -0.0, %rhs.scal 1932*9880d681SAndroid Build Coastguard Worker %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum) 1933*9880d681SAndroid Build Coastguard Worker ret double %res 1934*9880d681SAndroid Build Coastguard Worker} 1935*9880d681SAndroid Build Coastguard Worker 1936*9880d681SAndroid Build Coastguard Workerdeclare double @llvm.fma.f64(double, double, double) 1937*9880d681SAndroid Build Coastguard Worker 1938*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) { 1939*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32: 1940*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2s v0, v1, v2[3] 1941*9880d681SAndroid Build Coastguard Worker %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs 1942*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1943*9880d681SAndroid Build Coastguard Worker %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum) 1944*9880d681SAndroid Build Coastguard Worker ret <2 x float> %res 1945*9880d681SAndroid Build Coastguard Worker} 1946*9880d681SAndroid Build Coastguard Worker 1947*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) { 1948*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1: 1949*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2s v0, v1, v2[1] 1950*9880d681SAndroid Build Coastguard Worker %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs 1951*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1952*9880d681SAndroid Build Coastguard Worker %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum) 1953*9880d681SAndroid Build Coastguard Worker ret <2 x float> %res 1954*9880d681SAndroid Build Coastguard Worker} 1955*9880d681SAndroid Build Coastguard Worker 1956*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) { 1957*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32: 1958*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.4s v0, v1, v2[3] 1959*9880d681SAndroid Build Coastguard Worker %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs 1960*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1961*9880d681SAndroid Build Coastguard Worker %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum) 1962*9880d681SAndroid Build Coastguard Worker ret <4 x float> %res 1963*9880d681SAndroid Build Coastguard Worker} 1964*9880d681SAndroid Build Coastguard Worker 1965*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) { 1966*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1: 1967*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.4s v0, v1, v2[1] 1968*9880d681SAndroid Build Coastguard Worker %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs 1969*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1970*9880d681SAndroid Build Coastguard Worker %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum) 1971*9880d681SAndroid Build Coastguard Worker ret <4 x float> %res 1972*9880d681SAndroid Build Coastguard Worker} 1973*9880d681SAndroid Build Coastguard Worker 1974*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) { 1975*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fmls_with_fneg_before_extract_v2f64: 1976*9880d681SAndroid Build Coastguard Worker; CHECK: fmls.2d v0, v1, v2[1] 1977*9880d681SAndroid Build Coastguard Worker %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs 1978*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1979*9880d681SAndroid Build Coastguard Worker %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %lhs, <2 x double> %splat, <2 x double> %accum) 1980*9880d681SAndroid Build Coastguard Worker ret <2 x double> %res 1981*9880d681SAndroid Build Coastguard Worker} 1982*9880d681SAndroid Build Coastguard Worker 1983*9880d681SAndroid Build Coastguard Workerdefine <1 x double> @test_fmul_v1f64(<1 x double> %L, <1 x double> %R) nounwind { 1984*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_fmul_v1f64: 1985*9880d681SAndroid Build Coastguard Worker; CHECK: fmul 1986*9880d681SAndroid Build Coastguard Worker %prod = fmul <1 x double> %L, %R 1987*9880d681SAndroid Build Coastguard Worker ret <1 x double> %prod 1988*9880d681SAndroid Build Coastguard Worker} 1989*9880d681SAndroid Build Coastguard Worker 1990*9880d681SAndroid Build Coastguard Workerdefine <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind { 1991*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_fdiv_v1f64: 1992*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: fdiv 1993*9880d681SAndroid Build Coastguard Worker %prod = fdiv <1 x double> %L, %R 1994*9880d681SAndroid Build Coastguard Worker ret <1 x double> %prod 1995*9880d681SAndroid Build Coastguard Worker} 1996*9880d681SAndroid Build Coastguard Worker 1997*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind { 1998*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlal_d: 1999*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlal 2000*9880d681SAndroid Build Coastguard Worker %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) 2001*9880d681SAndroid Build Coastguard Worker %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4) 2002*9880d681SAndroid Build Coastguard Worker ret i64 %tmp5 2003*9880d681SAndroid Build Coastguard Worker} 2004*9880d681SAndroid Build Coastguard Worker 2005*9880d681SAndroid Build Coastguard Workerdefine i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { 2006*9880d681SAndroid Build Coastguard Worker;CHECK-LABEL: sqdmlsl_d: 2007*9880d681SAndroid Build Coastguard Worker;CHECK: sqdmlsl 2008*9880d681SAndroid Build Coastguard Worker %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) 2009*9880d681SAndroid Build Coastguard Worker %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4) 2010*9880d681SAndroid Build Coastguard Worker ret i64 %tmp5 2011*9880d681SAndroid Build Coastguard Worker} 2012*9880d681SAndroid Build Coastguard Worker 2013*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { 2014*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_pmull_64: 2015*9880d681SAndroid Build Coastguard Worker; CHECK: pmull.1q 2016*9880d681SAndroid Build Coastguard Worker %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) 2017*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %val 2018*9880d681SAndroid Build Coastguard Worker} 2019*9880d681SAndroid Build Coastguard Worker 2020*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { 2021*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_pmull_high_64: 2022*9880d681SAndroid Build Coastguard Worker; CHECK: pmull2.1q 2023*9880d681SAndroid Build Coastguard Worker %l_hi = extractelement <2 x i64> %l, i32 1 2024*9880d681SAndroid Build Coastguard Worker %r_hi = extractelement <2 x i64> %r, i32 1 2025*9880d681SAndroid Build Coastguard Worker %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) 2026*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %val 2027*9880d681SAndroid Build Coastguard Worker} 2028*9880d681SAndroid Build Coastguard Worker 2029*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) 2030*9880d681SAndroid Build Coastguard Worker 2031*9880d681SAndroid Build Coastguard Workerdefine <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind { 2032*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_mul_v1i64: 2033*9880d681SAndroid Build Coastguard Worker; CHECK: mul 2034*9880d681SAndroid Build Coastguard Worker %prod = mul <1 x i64> %lhs, %rhs 2035*9880d681SAndroid Build Coastguard Worker ret <1 x i64> %prod 2036*9880d681SAndroid Build Coastguard Worker} 2037