1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Worker; 6*9880d681SAndroid Build Coastguard Worker; Variable Shifts 7*9880d681SAndroid Build Coastguard Worker; 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 10*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shift_v8i64: 11*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 12*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 13*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 14*9880d681SAndroid Build Coastguard Worker %shift = lshr <8 x i64> %a, %b 15*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %shift 16*9880d681SAndroid Build Coastguard Worker} 17*9880d681SAndroid Build Coastguard Worker 18*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 19*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shift_v16i32: 20*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 21*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 22*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 23*9880d681SAndroid Build Coastguard Worker %shift = lshr <16 x i32> %a, %b 24*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %shift 25*9880d681SAndroid Build Coastguard Worker} 26*9880d681SAndroid Build Coastguard Worker 27*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 28*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: var_shift_v32i16: 29*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 30*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 31*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] 32*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15] 33*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm5, %ymm6, %ymm5 34*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 35*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] 36*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11] 37*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 38*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 39*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 40*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15] 41*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15] 42*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm5, %ymm2 43*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2 44*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11] 45*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11] 46*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 47*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 48*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1 49*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 50*9880d681SAndroid Build Coastguard Worker; 51*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: var_shift_v32i16: 52*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 53*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 54*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 55*9880d681SAndroid Build Coastguard Worker %shift = lshr <32 x i16> %a, %b 56*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %shift 57*9880d681SAndroid Build Coastguard Worker} 58*9880d681SAndroid Build Coastguard Worker 59*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 60*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: var_shift_v64i8: 61*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 62*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm4 63*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 64*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4 65*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 66*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 67*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm4 68*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 69*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4 70*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 71*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 72*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm4 73*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 74*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm4, %ymm4 75*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 76*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 77*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 78*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 79*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3 80*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 81*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 82*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2 83*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 84*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 85*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 86*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 87*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3 88*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 89*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 90*9880d681SAndroid Build Coastguard Worker 91*9880d681SAndroid Build Coastguard Worker %shift = lshr <64 x i8> %a, %b 92*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %shift 93*9880d681SAndroid Build Coastguard Worker} 94*9880d681SAndroid Build Coastguard Worker 95*9880d681SAndroid Build Coastguard Worker; 96*9880d681SAndroid Build Coastguard Worker; Uniform Variable Shifts 97*9880d681SAndroid Build Coastguard Worker; 98*9880d681SAndroid Build Coastguard Worker 99*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 100*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatvar_shift_v8i64: 101*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 102*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 103*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 104*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 105*9880d681SAndroid Build Coastguard Worker %shift = lshr <8 x i64> %a, %splat 106*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %shift 107*9880d681SAndroid Build Coastguard Worker} 108*9880d681SAndroid Build Coastguard Worker 109*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 110*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatvar_shift_v16i32: 111*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 112*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 113*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 114*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 115*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 117*9880d681SAndroid Build Coastguard Worker %shift = lshr <16 x i32> %a, %splat 118*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %shift 119*9880d681SAndroid Build Coastguard Worker} 120*9880d681SAndroid Build Coastguard Worker 121*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 122*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatvar_shift_v32i16: 123*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 124*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovd %xmm2, %eax 125*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: movzwl %ax, %eax 126*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovd %eax, %xmm2 127*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 128*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 129*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 130*9880d681SAndroid Build Coastguard Worker; 131*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatvar_shift_v32i16: 132*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 133*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd %xmm1, %eax 134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: movzwl %ax, %eax 135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd %eax, %xmm1 136*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 137*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 138*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer 139*9880d681SAndroid Build Coastguard Worker %shift = lshr <32 x i16> %a, %splat 140*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %shift 141*9880d681SAndroid Build Coastguard Worker} 142*9880d681SAndroid Build Coastguard Worker 143*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 144*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatvar_shift_v64i8: 145*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 146*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2 147*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm3 148*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 149*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 150*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2 151*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0 152*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm3 153*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 154*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3 155*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6 156*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0 157*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm3 158*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 159*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3 160*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 161*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0 162*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm3 163*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 164*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 165*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 166*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 167*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 168*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 169*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 170*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 171*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 172*9880d681SAndroid Build Coastguard Worker %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer 173*9880d681SAndroid Build Coastguard Worker %shift = lshr <64 x i8> %a, %splat 174*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %shift 175*9880d681SAndroid Build Coastguard Worker} 176*9880d681SAndroid Build Coastguard Worker 177*9880d681SAndroid Build Coastguard Worker; 178*9880d681SAndroid Build Coastguard Worker; Constant Shifts 179*9880d681SAndroid Build Coastguard Worker; 180*9880d681SAndroid Build Coastguard Worker 181*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind { 182*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: constant_shift_v8i64: 183*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 184*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0 185*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 186*9880d681SAndroid Build Coastguard Worker %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62> 187*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %shift 188*9880d681SAndroid Build Coastguard Worker} 189*9880d681SAndroid Build Coastguard Worker 190*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { 191*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: constant_shift_v16i32: 192*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 193*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 194*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 195*9880d681SAndroid Build Coastguard Worker %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 196*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %shift 197*9880d681SAndroid Build Coastguard Worker} 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { 200*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: constant_shift_v32i16: 201*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 202*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 203*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 204*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15] 205*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 206*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm5 207*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5 208*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11] 209*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 210*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm0, %ymm0 211*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0 212*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0 213*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15] 214*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm4 215*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm4, %ymm4 216*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11] 217*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 218*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1 219*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpackusdw %ymm4, %ymm1, %ymm1 220*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 221*9880d681SAndroid Build Coastguard Worker; 222*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: constant_shift_v32i16: 223*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 226*9880d681SAndroid Build Coastguard Worker %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 227*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %shift 228*9880d681SAndroid Build Coastguard Worker} 229*9880d681SAndroid Build Coastguard Worker 230*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { 231*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: constant_shift_v64i8: 232*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 233*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2 234*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 235*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 236*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 237*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4 238*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 239*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2 240*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 241*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 242*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6 243*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0 244*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2 245*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 246*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 247*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8 248*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0 249*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2 250*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 251*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1 252*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2 253*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 254*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 255*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2 256*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2 257*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1 258*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 259*9880d681SAndroid Build Coastguard Worker %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 260*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %shift 261*9880d681SAndroid Build Coastguard Worker} 262*9880d681SAndroid Build Coastguard Worker 263*9880d681SAndroid Build Coastguard Worker; 264*9880d681SAndroid Build Coastguard Worker; Uniform Constant Shifts 265*9880d681SAndroid Build Coastguard Worker; 266*9880d681SAndroid Build Coastguard Worker 267*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind { 268*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatconstant_shift_v8i64: 269*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 270*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrlq $7, %zmm0, %zmm0 271*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 272*9880d681SAndroid Build Coastguard Worker %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 273*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %shift 274*9880d681SAndroid Build Coastguard Worker} 275*9880d681SAndroid Build Coastguard Worker 276*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind { 277*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatconstant_shift_v16i32: 278*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 279*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpsrld $5, %zmm0, %zmm0 280*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 281*9880d681SAndroid Build Coastguard Worker %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 282*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %shift 283*9880d681SAndroid Build Coastguard Worker} 284*9880d681SAndroid Build Coastguard Worker 285*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { 286*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatconstant_shift_v32i16: 287*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 288*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 289*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 290*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 291*9880d681SAndroid Build Coastguard Worker; 292*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatconstant_shift_v32i16: 293*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 294*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 295*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 296*9880d681SAndroid Build Coastguard Worker %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 297*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %shift 298*9880d681SAndroid Build Coastguard Worker} 299*9880d681SAndroid Build Coastguard Worker 300*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { 301*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatconstant_shift_v64i8: 302*9880d681SAndroid Build Coastguard Worker; AVX512DQ: ## BB#0: 303*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 304*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 305*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 306*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 307*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 308*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT: retq 309*9880d681SAndroid Build Coastguard Worker; 310*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatconstant_shift_v64i8: 311*9880d681SAndroid Build Coastguard Worker; AVX512BW: ## BB#0: 312*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 313*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 314*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 315*9880d681SAndroid Build Coastguard Worker %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 316*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %shift 317*9880d681SAndroid Build Coastguard Worker} 318