xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; Variable Shifts
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shift_v8i64:
11*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
12*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
13*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
14*9880d681SAndroid Build Coastguard Worker  %shift = lshr <8 x i64> %a, %b
15*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %shift
16*9880d681SAndroid Build Coastguard Worker}
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shift_v16i32:
20*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
21*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
22*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
23*9880d681SAndroid Build Coastguard Worker  %shift = lshr <16 x i32> %a, %b
24*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shift
25*9880d681SAndroid Build Coastguard Worker}
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: var_shift_v32i16:
29*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
30*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
31*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
32*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15]
33*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm5, %ymm6, %ymm5
34*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
35*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
36*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11]
37*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
38*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
39*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
40*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
41*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
42*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm5, %ymm2
43*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
44*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
45*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
46*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
47*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
48*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
49*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker;
51*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: var_shift_v32i16:
52*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
53*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
54*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
55*9880d681SAndroid Build Coastguard Worker  %shift = lshr <32 x i16> %a, %b
56*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %shift
57*9880d681SAndroid Build Coastguard Worker}
58*9880d681SAndroid Build Coastguard Worker
59*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
60*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: var_shift_v64i8:
61*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
62*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
63*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
64*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
65*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
66*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
67*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
68*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
69*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
70*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
71*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
72*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
73*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
74*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
75*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
76*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
77*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
78*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
79*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
80*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
81*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
82*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
83*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
84*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
85*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
86*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
87*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
88*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
89*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Worker  %shift = lshr <64 x i8> %a, %b
92*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %shift
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker;
96*9880d681SAndroid Build Coastguard Worker; Uniform Variable Shifts
97*9880d681SAndroid Build Coastguard Worker;
98*9880d681SAndroid Build Coastguard Worker
99*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
100*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatvar_shift_v8i64:
101*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
102*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
103*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
104*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
105*9880d681SAndroid Build Coastguard Worker  %shift = lshr <8 x i64> %a, %splat
106*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %shift
107*9880d681SAndroid Build Coastguard Worker}
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
110*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatvar_shift_v16i32:
111*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
112*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
113*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
114*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
115*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
116*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
117*9880d681SAndroid Build Coastguard Worker  %shift = lshr <16 x i32> %a, %splat
118*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shift
119*9880d681SAndroid Build Coastguard Worker}
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
122*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatvar_shift_v32i16:
123*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
124*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovd %xmm2, %eax
125*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    movzwl %ax, %eax
126*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovd %eax, %xmm2
127*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
128*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
129*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
130*9880d681SAndroid Build Coastguard Worker;
131*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatvar_shift_v32i16:
132*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
133*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd %xmm1, %eax
134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    movzwl %ax, %eax
135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd %eax, %xmm1
136*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
137*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
138*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
139*9880d681SAndroid Build Coastguard Worker  %shift = lshr <32 x i16> %a, %splat
140*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %shift
141*9880d681SAndroid Build Coastguard Worker}
142*9880d681SAndroid Build Coastguard Worker
143*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
144*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatvar_shift_v64i8:
145*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
146*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
147*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
148*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
149*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
150*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
151*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
152*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
153*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
154*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
155*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
156*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
157*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
158*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
159*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
160*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
161*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
162*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
163*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
164*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
165*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
166*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
167*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
168*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
169*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
170*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
171*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
172*9880d681SAndroid Build Coastguard Worker  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173*9880d681SAndroid Build Coastguard Worker  %shift = lshr <64 x i8> %a, %splat
174*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %shift
175*9880d681SAndroid Build Coastguard Worker}
176*9880d681SAndroid Build Coastguard Worker
177*9880d681SAndroid Build Coastguard Worker;
178*9880d681SAndroid Build Coastguard Worker; Constant Shifts
179*9880d681SAndroid Build Coastguard Worker;
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
182*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: constant_shift_v8i64:
183*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
184*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
185*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
186*9880d681SAndroid Build Coastguard Worker  %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
187*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %shift
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
191*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: constant_shift_v16i32:
192*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
193*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
194*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
195*9880d681SAndroid Build Coastguard Worker  %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
196*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shift
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
200*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: constant_shift_v32i16:
201*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
202*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
203*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
204*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
205*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
206*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm5
207*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
208*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
209*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
210*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm0, %ymm0
211*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
212*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
213*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
214*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm4
215*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm4, %ymm4
216*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
217*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
218*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
219*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpackusdw %ymm4, %ymm1, %ymm1
220*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
221*9880d681SAndroid Build Coastguard Worker;
222*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: constant_shift_v32i16:
223*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
226*9880d681SAndroid Build Coastguard Worker  %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
227*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %shift
228*9880d681SAndroid Build Coastguard Worker}
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
231*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: constant_shift_v64i8:
232*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
233*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
234*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
235*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
236*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
237*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
238*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
239*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
240*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
241*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
242*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
243*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
244*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
245*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
246*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
247*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
248*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
249*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
250*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
251*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
252*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
253*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
254*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
255*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
256*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
257*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
258*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
259*9880d681SAndroid Build Coastguard Worker  %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
260*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %shift
261*9880d681SAndroid Build Coastguard Worker}
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker;
264*9880d681SAndroid Build Coastguard Worker; Uniform Constant Shifts
265*9880d681SAndroid Build Coastguard Worker;
266*9880d681SAndroid Build Coastguard Worker
267*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
268*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatconstant_shift_v8i64:
269*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
270*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
271*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
272*9880d681SAndroid Build Coastguard Worker  %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
273*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %shift
274*9880d681SAndroid Build Coastguard Worker}
275*9880d681SAndroid Build Coastguard Worker
276*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
277*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: splatconstant_shift_v16i32:
278*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
279*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
280*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
281*9880d681SAndroid Build Coastguard Worker  %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
282*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shift
283*9880d681SAndroid Build Coastguard Worker}
284*9880d681SAndroid Build Coastguard Worker
285*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
286*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatconstant_shift_v32i16:
287*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
288*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
289*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
290*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
291*9880d681SAndroid Build Coastguard Worker;
292*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatconstant_shift_v32i16:
293*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
294*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
295*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
296*9880d681SAndroid Build Coastguard Worker  %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
297*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %shift
298*9880d681SAndroid Build Coastguard Worker}
299*9880d681SAndroid Build Coastguard Worker
300*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
301*9880d681SAndroid Build Coastguard Worker; AVX512DQ-LABEL: splatconstant_shift_v64i8:
302*9880d681SAndroid Build Coastguard Worker; AVX512DQ:       ## BB#0:
303*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
304*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
305*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
306*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
307*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
308*9880d681SAndroid Build Coastguard Worker; AVX512DQ-NEXT:    retq
309*9880d681SAndroid Build Coastguard Worker;
310*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: splatconstant_shift_v64i8:
311*9880d681SAndroid Build Coastguard Worker; AVX512BW:       ## BB#0:
312*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
313*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
314*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
315*9880d681SAndroid Build Coastguard Worker  %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
316*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %shift
317*9880d681SAndroid Build Coastguard Worker}
318