xref: /aosp_15_r20/external/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // Define several functions to decode x86 specific shuffle semantics into a
11*9880d681SAndroid Build Coastguard Worker // generic vector mask.
12*9880d681SAndroid Build Coastguard Worker //
13*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
14*9880d681SAndroid Build Coastguard Worker 
15*9880d681SAndroid Build Coastguard Worker #include "X86ShuffleDecode.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/ArrayRef.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineValueType.h"
18*9880d681SAndroid Build Coastguard Worker 
19*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
20*9880d681SAndroid Build Coastguard Worker //  Vector Mask Decoding
21*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
22*9880d681SAndroid Build Coastguard Worker 
23*9880d681SAndroid Build Coastguard Worker namespace llvm {
24*9880d681SAndroid Build Coastguard Worker 
DecodeINSERTPSMask(unsigned Imm,SmallVectorImpl<int> & ShuffleMask)25*9880d681SAndroid Build Coastguard Worker void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
26*9880d681SAndroid Build Coastguard Worker   // Defaults the copying the dest value.
27*9880d681SAndroid Build Coastguard Worker   ShuffleMask.push_back(0);
28*9880d681SAndroid Build Coastguard Worker   ShuffleMask.push_back(1);
29*9880d681SAndroid Build Coastguard Worker   ShuffleMask.push_back(2);
30*9880d681SAndroid Build Coastguard Worker   ShuffleMask.push_back(3);
31*9880d681SAndroid Build Coastguard Worker 
32*9880d681SAndroid Build Coastguard Worker   // Decode the immediate.
33*9880d681SAndroid Build Coastguard Worker   unsigned ZMask = Imm & 15;
34*9880d681SAndroid Build Coastguard Worker   unsigned CountD = (Imm >> 4) & 3;
35*9880d681SAndroid Build Coastguard Worker   unsigned CountS = (Imm >> 6) & 3;
36*9880d681SAndroid Build Coastguard Worker 
37*9880d681SAndroid Build Coastguard Worker   // CountS selects which input element to use.
38*9880d681SAndroid Build Coastguard Worker   unsigned InVal = 4 + CountS;
39*9880d681SAndroid Build Coastguard Worker   // CountD specifies which element of destination to update.
40*9880d681SAndroid Build Coastguard Worker   ShuffleMask[CountD] = InVal;
41*9880d681SAndroid Build Coastguard Worker   // ZMask zaps values, potentially overriding the CountD elt.
42*9880d681SAndroid Build Coastguard Worker   if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
43*9880d681SAndroid Build Coastguard Worker   if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
44*9880d681SAndroid Build Coastguard Worker   if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
45*9880d681SAndroid Build Coastguard Worker   if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
46*9880d681SAndroid Build Coastguard Worker }
47*9880d681SAndroid Build Coastguard Worker 
DecodeInsertElementMask(MVT VT,unsigned Idx,unsigned Len,SmallVectorImpl<int> & ShuffleMask)48*9880d681SAndroid Build Coastguard Worker void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len,
49*9880d681SAndroid Build Coastguard Worker                              SmallVectorImpl<int> &ShuffleMask) {
50*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
51*9880d681SAndroid Build Coastguard Worker   assert((Idx + Len) <= NumElts && "Insertion out of range");
52*9880d681SAndroid Build Coastguard Worker 
53*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != NumElts; ++i)
54*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i);
55*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != Len; ++i)
56*9880d681SAndroid Build Coastguard Worker     ShuffleMask[Idx + i] = NumElts + i;
57*9880d681SAndroid Build Coastguard Worker }
58*9880d681SAndroid Build Coastguard Worker 
59*9880d681SAndroid Build Coastguard Worker // <3,1> or <6,7,2,3>
DecodeMOVHLPSMask(unsigned NElts,SmallVectorImpl<int> & ShuffleMask)60*9880d681SAndroid Build Coastguard Worker void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
61*9880d681SAndroid Build Coastguard Worker   for (unsigned i = NElts / 2; i != NElts; ++i)
62*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(NElts + i);
63*9880d681SAndroid Build Coastguard Worker 
64*9880d681SAndroid Build Coastguard Worker   for (unsigned i = NElts / 2; i != NElts; ++i)
65*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i);
66*9880d681SAndroid Build Coastguard Worker }
67*9880d681SAndroid Build Coastguard Worker 
68*9880d681SAndroid Build Coastguard Worker // <0,2> or <0,1,4,5>
DecodeMOVLHPSMask(unsigned NElts,SmallVectorImpl<int> & ShuffleMask)69*9880d681SAndroid Build Coastguard Worker void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
70*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != NElts / 2; ++i)
71*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i);
72*9880d681SAndroid Build Coastguard Worker 
73*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != NElts / 2; ++i)
74*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(NElts + i);
75*9880d681SAndroid Build Coastguard Worker }
76*9880d681SAndroid Build Coastguard Worker 
DecodeMOVSLDUPMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)77*9880d681SAndroid Build Coastguard Worker void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
78*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
79*9880d681SAndroid Build Coastguard Worker   for (int i = 0, e = NumElts / 2; i < e; ++i) {
80*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(2 * i);
81*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(2 * i);
82*9880d681SAndroid Build Coastguard Worker   }
83*9880d681SAndroid Build Coastguard Worker }
84*9880d681SAndroid Build Coastguard Worker 
DecodeMOVSHDUPMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)85*9880d681SAndroid Build Coastguard Worker void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
86*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
87*9880d681SAndroid Build Coastguard Worker   for (int i = 0, e = NumElts / 2; i < e; ++i) {
88*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(2 * i + 1);
89*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(2 * i + 1);
90*9880d681SAndroid Build Coastguard Worker   }
91*9880d681SAndroid Build Coastguard Worker }
92*9880d681SAndroid Build Coastguard Worker 
DecodeMOVDDUPMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)93*9880d681SAndroid Build Coastguard Worker void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
94*9880d681SAndroid Build Coastguard Worker   unsigned VectorSizeInBits = VT.getSizeInBits();
95*9880d681SAndroid Build Coastguard Worker   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
96*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
97*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VectorSizeInBits / 128;
98*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
99*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneSubElts = 64 / ScalarSizeInBits;
100*9880d681SAndroid Build Coastguard Worker 
101*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l < NumElts; l += NumLaneElts)
102*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)
103*9880d681SAndroid Build Coastguard Worker       for (unsigned s = 0; s != NumLaneSubElts; s++)
104*9880d681SAndroid Build Coastguard Worker         ShuffleMask.push_back(l + s);
105*9880d681SAndroid Build Coastguard Worker }
106*9880d681SAndroid Build Coastguard Worker 
DecodePSLLDQMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)107*9880d681SAndroid Build Coastguard Worker void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
108*9880d681SAndroid Build Coastguard Worker   unsigned VectorSizeInBits = VT.getSizeInBits();
109*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VectorSizeInBits / 8;
110*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VectorSizeInBits / 128;
111*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
112*9880d681SAndroid Build Coastguard Worker 
113*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l < NumElts; l += NumLaneElts)
114*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i < NumLaneElts; ++i) {
115*9880d681SAndroid Build Coastguard Worker       int M = SM_SentinelZero;
116*9880d681SAndroid Build Coastguard Worker       if (i >= Imm) M = i - Imm + l;
117*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(M);
118*9880d681SAndroid Build Coastguard Worker     }
119*9880d681SAndroid Build Coastguard Worker }
120*9880d681SAndroid Build Coastguard Worker 
DecodePSRLDQMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)121*9880d681SAndroid Build Coastguard Worker void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
122*9880d681SAndroid Build Coastguard Worker   unsigned VectorSizeInBits = VT.getSizeInBits();
123*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VectorSizeInBits / 8;
124*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VectorSizeInBits / 128;
125*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
126*9880d681SAndroid Build Coastguard Worker 
127*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l < NumElts; l += NumLaneElts)
128*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i < NumLaneElts; ++i) {
129*9880d681SAndroid Build Coastguard Worker       unsigned Base = i + Imm;
130*9880d681SAndroid Build Coastguard Worker       int M = Base + l;
131*9880d681SAndroid Build Coastguard Worker       if (Base >= NumLaneElts) M = SM_SentinelZero;
132*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(M);
133*9880d681SAndroid Build Coastguard Worker     }
134*9880d681SAndroid Build Coastguard Worker }
135*9880d681SAndroid Build Coastguard Worker 
DecodePALIGNRMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)136*9880d681SAndroid Build Coastguard Worker void DecodePALIGNRMask(MVT VT, unsigned Imm,
137*9880d681SAndroid Build Coastguard Worker                        SmallVectorImpl<int> &ShuffleMask) {
138*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
139*9880d681SAndroid Build Coastguard Worker   unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
140*9880d681SAndroid Build Coastguard Worker 
141*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
142*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
143*9880d681SAndroid Build Coastguard Worker 
144*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
145*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i != NumLaneElts; ++i) {
146*9880d681SAndroid Build Coastguard Worker       unsigned Base = i + Offset;
147*9880d681SAndroid Build Coastguard Worker       // if i+offset is out of this lane then we actually need the other source
148*9880d681SAndroid Build Coastguard Worker       if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
149*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(Base + l);
150*9880d681SAndroid Build Coastguard Worker     }
151*9880d681SAndroid Build Coastguard Worker   }
152*9880d681SAndroid Build Coastguard Worker }
153*9880d681SAndroid Build Coastguard Worker 
154*9880d681SAndroid Build Coastguard Worker /// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
155*9880d681SAndroid Build Coastguard Worker /// VT indicates the type of the vector allowing it to handle different
156*9880d681SAndroid Build Coastguard Worker /// datatypes and vector widths.
DecodePSHUFMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)157*9880d681SAndroid Build Coastguard Worker void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
158*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
159*9880d681SAndroid Build Coastguard Worker 
160*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
161*9880d681SAndroid Build Coastguard Worker   if (NumLanes == 0) NumLanes = 1;  // Handle MMX
162*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
163*9880d681SAndroid Build Coastguard Worker 
164*9880d681SAndroid Build Coastguard Worker   unsigned NewImm = Imm;
165*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
166*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i != NumLaneElts; ++i) {
167*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(NewImm % NumLaneElts + l);
168*9880d681SAndroid Build Coastguard Worker       NewImm /= NumLaneElts;
169*9880d681SAndroid Build Coastguard Worker     }
170*9880d681SAndroid Build Coastguard Worker     if (NumLaneElts == 4) NewImm = Imm; // reload imm
171*9880d681SAndroid Build Coastguard Worker   }
172*9880d681SAndroid Build Coastguard Worker }
173*9880d681SAndroid Build Coastguard Worker 
DecodePSHUFHWMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)174*9880d681SAndroid Build Coastguard Worker void DecodePSHUFHWMask(MVT VT, unsigned Imm,
175*9880d681SAndroid Build Coastguard Worker                        SmallVectorImpl<int> &ShuffleMask) {
176*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
177*9880d681SAndroid Build Coastguard Worker 
178*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += 8) {
179*9880d681SAndroid Build Coastguard Worker     unsigned NewImm = Imm;
180*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = 4; i != e; ++i) {
181*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(l + i);
182*9880d681SAndroid Build Coastguard Worker     }
183*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 4, e = 8; i != e; ++i) {
184*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(l + 4 + (NewImm & 3));
185*9880d681SAndroid Build Coastguard Worker       NewImm >>= 2;
186*9880d681SAndroid Build Coastguard Worker     }
187*9880d681SAndroid Build Coastguard Worker   }
188*9880d681SAndroid Build Coastguard Worker }
189*9880d681SAndroid Build Coastguard Worker 
DecodePSHUFLWMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)190*9880d681SAndroid Build Coastguard Worker void DecodePSHUFLWMask(MVT VT, unsigned Imm,
191*9880d681SAndroid Build Coastguard Worker                        SmallVectorImpl<int> &ShuffleMask) {
192*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
193*9880d681SAndroid Build Coastguard Worker 
194*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += 8) {
195*9880d681SAndroid Build Coastguard Worker     unsigned NewImm = Imm;
196*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = 4; i != e; ++i) {
197*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(l + (NewImm & 3));
198*9880d681SAndroid Build Coastguard Worker       NewImm >>= 2;
199*9880d681SAndroid Build Coastguard Worker     }
200*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 4, e = 8; i != e; ++i) {
201*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(l + i);
202*9880d681SAndroid Build Coastguard Worker     }
203*9880d681SAndroid Build Coastguard Worker   }
204*9880d681SAndroid Build Coastguard Worker }
205*9880d681SAndroid Build Coastguard Worker 
DecodePSWAPMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)206*9880d681SAndroid Build Coastguard Worker void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
207*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
208*9880d681SAndroid Build Coastguard Worker   unsigned NumHalfElts = NumElts / 2;
209*9880d681SAndroid Build Coastguard Worker 
210*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumHalfElts; ++l)
211*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(l + NumHalfElts);
212*9880d681SAndroid Build Coastguard Worker   for (unsigned h = 0; h != NumHalfElts; ++h)
213*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(h);
214*9880d681SAndroid Build Coastguard Worker }
215*9880d681SAndroid Build Coastguard Worker 
216*9880d681SAndroid Build Coastguard Worker /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
217*9880d681SAndroid Build Coastguard Worker /// the type of the vector allowing it to handle different datatypes and vector
218*9880d681SAndroid Build Coastguard Worker /// widths.
DecodeSHUFPMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)219*9880d681SAndroid Build Coastguard Worker void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
220*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
223*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
224*9880d681SAndroid Build Coastguard Worker 
225*9880d681SAndroid Build Coastguard Worker   unsigned NewImm = Imm;
226*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
227*9880d681SAndroid Build Coastguard Worker     // each half of a lane comes from different source
228*9880d681SAndroid Build Coastguard Worker     for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
229*9880d681SAndroid Build Coastguard Worker       for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
230*9880d681SAndroid Build Coastguard Worker         ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
231*9880d681SAndroid Build Coastguard Worker         NewImm /= NumLaneElts;
232*9880d681SAndroid Build Coastguard Worker       }
233*9880d681SAndroid Build Coastguard Worker     }
234*9880d681SAndroid Build Coastguard Worker     if (NumLaneElts == 4) NewImm = Imm; // reload imm
235*9880d681SAndroid Build Coastguard Worker   }
236*9880d681SAndroid Build Coastguard Worker }
237*9880d681SAndroid Build Coastguard Worker 
238*9880d681SAndroid Build Coastguard Worker /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
239*9880d681SAndroid Build Coastguard Worker /// and punpckh*. VT indicates the type of the vector allowing it to handle
240*9880d681SAndroid Build Coastguard Worker /// different datatypes and vector widths.
DecodeUNPCKHMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)241*9880d681SAndroid Build Coastguard Worker void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
242*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
243*9880d681SAndroid Build Coastguard Worker 
244*9880d681SAndroid Build Coastguard Worker   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
245*9880d681SAndroid Build Coastguard Worker   // independently on 128-bit lanes.
246*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
247*9880d681SAndroid Build Coastguard Worker   if (NumLanes == 0) NumLanes = 1;  // Handle MMX
248*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
249*9880d681SAndroid Build Coastguard Worker 
250*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
251*9880d681SAndroid Build Coastguard Worker     for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
252*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(i);           // Reads from dest/src1
253*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(i + NumElts); // Reads from src/src2
254*9880d681SAndroid Build Coastguard Worker     }
255*9880d681SAndroid Build Coastguard Worker   }
256*9880d681SAndroid Build Coastguard Worker }
257*9880d681SAndroid Build Coastguard Worker 
258*9880d681SAndroid Build Coastguard Worker /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
259*9880d681SAndroid Build Coastguard Worker /// and punpckl*. VT indicates the type of the vector allowing it to handle
260*9880d681SAndroid Build Coastguard Worker /// different datatypes and vector widths.
DecodeUNPCKLMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)261*9880d681SAndroid Build Coastguard Worker void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
262*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
263*9880d681SAndroid Build Coastguard Worker 
264*9880d681SAndroid Build Coastguard Worker   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
265*9880d681SAndroid Build Coastguard Worker   // independently on 128-bit lanes.
266*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
267*9880d681SAndroid Build Coastguard Worker   if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
268*9880d681SAndroid Build Coastguard Worker   unsigned NumLaneElts = NumElts / NumLanes;
269*9880d681SAndroid Build Coastguard Worker 
270*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
271*9880d681SAndroid Build Coastguard Worker     for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
272*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(i);           // Reads from dest/src1
273*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(i + NumElts); // Reads from src/src2
274*9880d681SAndroid Build Coastguard Worker     }
275*9880d681SAndroid Build Coastguard Worker   }
276*9880d681SAndroid Build Coastguard Worker }
277*9880d681SAndroid Build Coastguard Worker 
278*9880d681SAndroid Build Coastguard Worker /// Decodes a broadcast of a subvector to a larger vector type.
DecodeSubVectorBroadcast(MVT DstVT,MVT SrcVT,SmallVectorImpl<int> & ShuffleMask)279*9880d681SAndroid Build Coastguard Worker void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT,
280*9880d681SAndroid Build Coastguard Worker                               SmallVectorImpl<int> &ShuffleMask) {
281*9880d681SAndroid Build Coastguard Worker   assert(SrcVT.getScalarType() == DstVT.getScalarType() &&
282*9880d681SAndroid Build Coastguard Worker          "Non matching vector element types");
283*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = SrcVT.getVectorNumElements();
284*9880d681SAndroid Build Coastguard Worker   unsigned Scale = DstVT.getSizeInBits() / SrcVT.getSizeInBits();
285*9880d681SAndroid Build Coastguard Worker 
286*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != Scale; ++i)
287*9880d681SAndroid Build Coastguard Worker     for (unsigned j = 0; j != NumElts; ++j)
288*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(j);
289*9880d681SAndroid Build Coastguard Worker }
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker /// \brief Decode a shuffle packed values at 128-bit granularity
292*9880d681SAndroid Build Coastguard Worker /// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
293*9880d681SAndroid Build Coastguard Worker /// immediate mask into a shuffle mask.
decodeVSHUF64x2FamilyMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)294*9880d681SAndroid Build Coastguard Worker void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
295*9880d681SAndroid Build Coastguard Worker                         SmallVectorImpl<int> &ShuffleMask) {
296*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VT.getSizeInBits() / 128;
297*9880d681SAndroid Build Coastguard Worker   unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
298*9880d681SAndroid Build Coastguard Worker   unsigned ControlBitsMask = NumLanes - 1;
299*9880d681SAndroid Build Coastguard Worker   unsigned NumControlBits  = NumLanes / 2;
300*9880d681SAndroid Build Coastguard Worker 
301*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumLanes; ++l) {
302*9880d681SAndroid Build Coastguard Worker     unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
303*9880d681SAndroid Build Coastguard Worker     // We actually need the other source.
304*9880d681SAndroid Build Coastguard Worker     if (l >= NumLanes / 2)
305*9880d681SAndroid Build Coastguard Worker       LaneMask += NumLanes;
306*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i != NumElementsInLane; ++i)
307*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
308*9880d681SAndroid Build Coastguard Worker   }
309*9880d681SAndroid Build Coastguard Worker }
310*9880d681SAndroid Build Coastguard Worker 
DecodeVPERM2X128Mask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)311*9880d681SAndroid Build Coastguard Worker void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
312*9880d681SAndroid Build Coastguard Worker                           SmallVectorImpl<int> &ShuffleMask) {
313*9880d681SAndroid Build Coastguard Worker   unsigned HalfSize = VT.getVectorNumElements() / 2;
314*9880d681SAndroid Build Coastguard Worker 
315*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != 2; ++l) {
316*9880d681SAndroid Build Coastguard Worker     unsigned HalfMask = Imm >> (l * 4);
317*9880d681SAndroid Build Coastguard Worker     unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
318*9880d681SAndroid Build Coastguard Worker     for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
319*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
320*9880d681SAndroid Build Coastguard Worker   }
321*9880d681SAndroid Build Coastguard Worker }
322*9880d681SAndroid Build Coastguard Worker 
DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)323*9880d681SAndroid Build Coastguard Worker void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
324*9880d681SAndroid Build Coastguard Worker                       SmallVectorImpl<int> &ShuffleMask) {
325*9880d681SAndroid Build Coastguard Worker   for (int i = 0, e = RawMask.size(); i < e; ++i) {
326*9880d681SAndroid Build Coastguard Worker     uint64_t M = RawMask[i];
327*9880d681SAndroid Build Coastguard Worker     if (M == (uint64_t)SM_SentinelUndef) {
328*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(M);
329*9880d681SAndroid Build Coastguard Worker       continue;
330*9880d681SAndroid Build Coastguard Worker     }
331*9880d681SAndroid Build Coastguard Worker     // For 256/512-bit vectors the base of the shuffle is the 128-bit
332*9880d681SAndroid Build Coastguard Worker     // subvector we're inside.
333*9880d681SAndroid Build Coastguard Worker     int Base = (i / 16) * 16;
334*9880d681SAndroid Build Coastguard Worker     // If the high bit (7) of the byte is set, the element is zeroed.
335*9880d681SAndroid Build Coastguard Worker     if (M & (1 << 7))
336*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(SM_SentinelZero);
337*9880d681SAndroid Build Coastguard Worker     else {
338*9880d681SAndroid Build Coastguard Worker       // Only the least significant 4 bits of the byte are used.
339*9880d681SAndroid Build Coastguard Worker       int Index = Base + (M & 0xf);
340*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(Index);
341*9880d681SAndroid Build Coastguard Worker     }
342*9880d681SAndroid Build Coastguard Worker   }
343*9880d681SAndroid Build Coastguard Worker }
344*9880d681SAndroid Build Coastguard Worker 
DecodeBLENDMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)345*9880d681SAndroid Build Coastguard Worker void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
346*9880d681SAndroid Build Coastguard Worker   int ElementBits = VT.getScalarSizeInBits();
347*9880d681SAndroid Build Coastguard Worker   int NumElements = VT.getVectorNumElements();
348*9880d681SAndroid Build Coastguard Worker   for (int i = 0; i < NumElements; ++i) {
349*9880d681SAndroid Build Coastguard Worker     // If there are more than 8 elements in the vector, then any immediate blend
350*9880d681SAndroid Build Coastguard Worker     // mask applies to each 128-bit lane. There can never be more than
351*9880d681SAndroid Build Coastguard Worker     // 8 elements in a 128-bit lane with an immediate blend.
352*9880d681SAndroid Build Coastguard Worker     int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
353*9880d681SAndroid Build Coastguard Worker     assert(Bit < 8 &&
354*9880d681SAndroid Build Coastguard Worker            "Immediate blends only operate over 8 elements at a time!");
355*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
356*9880d681SAndroid Build Coastguard Worker   }
357*9880d681SAndroid Build Coastguard Worker }
358*9880d681SAndroid Build Coastguard Worker 
DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)359*9880d681SAndroid Build Coastguard Worker void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
360*9880d681SAndroid Build Coastguard Worker                       SmallVectorImpl<int> &ShuffleMask) {
361*9880d681SAndroid Build Coastguard Worker   assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
362*9880d681SAndroid Build Coastguard Worker 
363*9880d681SAndroid Build Coastguard Worker   // VPPERM Operation
364*9880d681SAndroid Build Coastguard Worker   // Bits[4:0] - Byte Index (0 - 31)
365*9880d681SAndroid Build Coastguard Worker   // Bits[7:5] - Permute Operation
366*9880d681SAndroid Build Coastguard Worker   //
367*9880d681SAndroid Build Coastguard Worker   // Permute Operation:
368*9880d681SAndroid Build Coastguard Worker   // 0 - Source byte (no logical operation).
369*9880d681SAndroid Build Coastguard Worker   // 1 - Invert source byte.
370*9880d681SAndroid Build Coastguard Worker   // 2 - Bit reverse of source byte.
371*9880d681SAndroid Build Coastguard Worker   // 3 - Bit reverse of inverted source byte.
372*9880d681SAndroid Build Coastguard Worker   // 4 - 00h (zero - fill).
373*9880d681SAndroid Build Coastguard Worker   // 5 - FFh (ones - fill).
374*9880d681SAndroid Build Coastguard Worker   // 6 - Most significant bit of source byte replicated in all bit positions.
375*9880d681SAndroid Build Coastguard Worker   // 7 - Invert most significant bit of source byte and replicate in all bit positions.
376*9880d681SAndroid Build Coastguard Worker   for (int i = 0, e = RawMask.size(); i < e; ++i) {
377*9880d681SAndroid Build Coastguard Worker     uint64_t M = RawMask[i];
378*9880d681SAndroid Build Coastguard Worker     if (M == (uint64_t)SM_SentinelUndef) {
379*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(M);
380*9880d681SAndroid Build Coastguard Worker       continue;
381*9880d681SAndroid Build Coastguard Worker     }
382*9880d681SAndroid Build Coastguard Worker 
383*9880d681SAndroid Build Coastguard Worker     uint64_t PermuteOp = (M >> 5) & 0x7;
384*9880d681SAndroid Build Coastguard Worker     if (PermuteOp == 4) {
385*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(SM_SentinelZero);
386*9880d681SAndroid Build Coastguard Worker       continue;
387*9880d681SAndroid Build Coastguard Worker     }
388*9880d681SAndroid Build Coastguard Worker     if (PermuteOp != 0) {
389*9880d681SAndroid Build Coastguard Worker       ShuffleMask.clear();
390*9880d681SAndroid Build Coastguard Worker       return;
391*9880d681SAndroid Build Coastguard Worker     }
392*9880d681SAndroid Build Coastguard Worker 
393*9880d681SAndroid Build Coastguard Worker     uint64_t Index = M & 0x1F;
394*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back((int)Index);
395*9880d681SAndroid Build Coastguard Worker   }
396*9880d681SAndroid Build Coastguard Worker }
397*9880d681SAndroid Build Coastguard Worker 
398*9880d681SAndroid Build Coastguard Worker /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
DecodeVPERMMask(MVT VT,unsigned Imm,SmallVectorImpl<int> & ShuffleMask)399*9880d681SAndroid Build Coastguard Worker void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
400*9880d681SAndroid Build Coastguard Worker   assert((VT.is256BitVector() || VT.is512BitVector()) &&
401*9880d681SAndroid Build Coastguard Worker          (VT.getScalarSizeInBits() == 64) && "Unexpected vector value type");
402*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
403*9880d681SAndroid Build Coastguard Worker   for (unsigned l = 0; l != NumElts; l += 4)
404*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0; i != 4; ++i)
405*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
406*9880d681SAndroid Build Coastguard Worker }
407*9880d681SAndroid Build Coastguard Worker 
DecodeZeroExtendMask(MVT SrcScalarVT,MVT DstVT,SmallVectorImpl<int> & Mask)408*9880d681SAndroid Build Coastguard Worker void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
409*9880d681SAndroid Build Coastguard Worker   unsigned NumDstElts = DstVT.getVectorNumElements();
410*9880d681SAndroid Build Coastguard Worker   unsigned SrcScalarBits = SrcScalarVT.getSizeInBits();
411*9880d681SAndroid Build Coastguard Worker   unsigned DstScalarBits = DstVT.getScalarSizeInBits();
412*9880d681SAndroid Build Coastguard Worker   unsigned Scale = DstScalarBits / SrcScalarBits;
413*9880d681SAndroid Build Coastguard Worker   assert(SrcScalarBits < DstScalarBits &&
414*9880d681SAndroid Build Coastguard Worker          "Expected zero extension mask to increase scalar size");
415*9880d681SAndroid Build Coastguard Worker 
416*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i != NumDstElts; i++) {
417*9880d681SAndroid Build Coastguard Worker     Mask.push_back(i);
418*9880d681SAndroid Build Coastguard Worker     for (unsigned j = 1; j != Scale; j++)
419*9880d681SAndroid Build Coastguard Worker       Mask.push_back(SM_SentinelZero);
420*9880d681SAndroid Build Coastguard Worker   }
421*9880d681SAndroid Build Coastguard Worker }
422*9880d681SAndroid Build Coastguard Worker 
DecodeZeroMoveLowMask(MVT VT,SmallVectorImpl<int> & ShuffleMask)423*9880d681SAndroid Build Coastguard Worker void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
424*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
425*9880d681SAndroid Build Coastguard Worker   ShuffleMask.push_back(0);
426*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 1; i < NumElts; i++)
427*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(SM_SentinelZero);
428*9880d681SAndroid Build Coastguard Worker }
429*9880d681SAndroid Build Coastguard Worker 
DecodeScalarMoveMask(MVT VT,bool IsLoad,SmallVectorImpl<int> & Mask)430*9880d681SAndroid Build Coastguard Worker void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
431*9880d681SAndroid Build Coastguard Worker   // First element comes from the first element of second source.
432*9880d681SAndroid Build Coastguard Worker   // Remaining elements: Load zero extends / Move copies from first source.
433*9880d681SAndroid Build Coastguard Worker   unsigned NumElts = VT.getVectorNumElements();
434*9880d681SAndroid Build Coastguard Worker   Mask.push_back(NumElts);
435*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 1; i < NumElts; i++)
436*9880d681SAndroid Build Coastguard Worker     Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
437*9880d681SAndroid Build Coastguard Worker }
438*9880d681SAndroid Build Coastguard Worker 
DecodeEXTRQIMask(int Len,int Idx,SmallVectorImpl<int> & ShuffleMask)439*9880d681SAndroid Build Coastguard Worker void DecodeEXTRQIMask(int Len, int Idx,
440*9880d681SAndroid Build Coastguard Worker                       SmallVectorImpl<int> &ShuffleMask) {
441*9880d681SAndroid Build Coastguard Worker   // Only the bottom 6 bits are valid for each immediate.
442*9880d681SAndroid Build Coastguard Worker   Len &= 0x3F;
443*9880d681SAndroid Build Coastguard Worker   Idx &= 0x3F;
444*9880d681SAndroid Build Coastguard Worker 
445*9880d681SAndroid Build Coastguard Worker   // We can only decode this bit extraction instruction as a shuffle if both the
446*9880d681SAndroid Build Coastguard Worker   // length and index work with whole bytes.
447*9880d681SAndroid Build Coastguard Worker   if (0 != (Len % 8) || 0 != (Idx % 8))
448*9880d681SAndroid Build Coastguard Worker     return;
449*9880d681SAndroid Build Coastguard Worker 
450*9880d681SAndroid Build Coastguard Worker   // A length of zero is equivalent to a bit length of 64.
451*9880d681SAndroid Build Coastguard Worker   if (Len == 0)
452*9880d681SAndroid Build Coastguard Worker     Len = 64;
453*9880d681SAndroid Build Coastguard Worker 
454*9880d681SAndroid Build Coastguard Worker   // If the length + index exceeds the bottom 64 bits the result is undefined.
455*9880d681SAndroid Build Coastguard Worker   if ((Len + Idx) > 64) {
456*9880d681SAndroid Build Coastguard Worker     ShuffleMask.append(16, SM_SentinelUndef);
457*9880d681SAndroid Build Coastguard Worker     return;
458*9880d681SAndroid Build Coastguard Worker   }
459*9880d681SAndroid Build Coastguard Worker 
460*9880d681SAndroid Build Coastguard Worker   // Convert index and index to work with bytes.
461*9880d681SAndroid Build Coastguard Worker   Len /= 8;
462*9880d681SAndroid Build Coastguard Worker   Idx /= 8;
463*9880d681SAndroid Build Coastguard Worker 
464*9880d681SAndroid Build Coastguard Worker   // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
465*9880d681SAndroid Build Coastguard Worker   // of the lower 64-bits. The upper 64-bits are undefined.
466*9880d681SAndroid Build Coastguard Worker   for (int i = 0; i != Len; ++i)
467*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i + Idx);
468*9880d681SAndroid Build Coastguard Worker   for (int i = Len; i != 8; ++i)
469*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(SM_SentinelZero);
470*9880d681SAndroid Build Coastguard Worker   for (int i = 8; i != 16; ++i)
471*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(SM_SentinelUndef);
472*9880d681SAndroid Build Coastguard Worker }
473*9880d681SAndroid Build Coastguard Worker 
DecodeINSERTQIMask(int Len,int Idx,SmallVectorImpl<int> & ShuffleMask)474*9880d681SAndroid Build Coastguard Worker void DecodeINSERTQIMask(int Len, int Idx,
475*9880d681SAndroid Build Coastguard Worker                         SmallVectorImpl<int> &ShuffleMask) {
476*9880d681SAndroid Build Coastguard Worker   // Only the bottom 6 bits are valid for each immediate.
477*9880d681SAndroid Build Coastguard Worker   Len &= 0x3F;
478*9880d681SAndroid Build Coastguard Worker   Idx &= 0x3F;
479*9880d681SAndroid Build Coastguard Worker 
480*9880d681SAndroid Build Coastguard Worker   // We can only decode this bit insertion instruction as a shuffle if both the
481*9880d681SAndroid Build Coastguard Worker   // length and index work with whole bytes.
482*9880d681SAndroid Build Coastguard Worker   if (0 != (Len % 8) || 0 != (Idx % 8))
483*9880d681SAndroid Build Coastguard Worker     return;
484*9880d681SAndroid Build Coastguard Worker 
485*9880d681SAndroid Build Coastguard Worker   // A length of zero is equivalent to a bit length of 64.
486*9880d681SAndroid Build Coastguard Worker   if (Len == 0)
487*9880d681SAndroid Build Coastguard Worker     Len = 64;
488*9880d681SAndroid Build Coastguard Worker 
489*9880d681SAndroid Build Coastguard Worker   // If the length + index exceeds the bottom 64 bits the result is undefined.
490*9880d681SAndroid Build Coastguard Worker   if ((Len + Idx) > 64) {
491*9880d681SAndroid Build Coastguard Worker     ShuffleMask.append(16, SM_SentinelUndef);
492*9880d681SAndroid Build Coastguard Worker     return;
493*9880d681SAndroid Build Coastguard Worker   }
494*9880d681SAndroid Build Coastguard Worker 
495*9880d681SAndroid Build Coastguard Worker   // Convert index and index to work with bytes.
496*9880d681SAndroid Build Coastguard Worker   Len /= 8;
497*9880d681SAndroid Build Coastguard Worker   Idx /= 8;
498*9880d681SAndroid Build Coastguard Worker 
499*9880d681SAndroid Build Coastguard Worker   // INSERTQ: Extract lowest Len bytes from lower half of second source and
500*9880d681SAndroid Build Coastguard Worker   // insert over first source starting at Idx byte. The upper 64-bits are
501*9880d681SAndroid Build Coastguard Worker   // undefined.
502*9880d681SAndroid Build Coastguard Worker   for (int i = 0; i != Idx; ++i)
503*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i);
504*9880d681SAndroid Build Coastguard Worker   for (int i = 0; i != Len; ++i)
505*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i + 16);
506*9880d681SAndroid Build Coastguard Worker   for (int i = Idx + Len; i != 8; ++i)
507*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(i);
508*9880d681SAndroid Build Coastguard Worker   for (int i = 8; i != 16; ++i)
509*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back(SM_SentinelUndef);
510*9880d681SAndroid Build Coastguard Worker }
511*9880d681SAndroid Build Coastguard Worker 
DecodeVPERMILPMask(MVT VT,ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)512*9880d681SAndroid Build Coastguard Worker void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
513*9880d681SAndroid Build Coastguard Worker                         SmallVectorImpl<int> &ShuffleMask) {
514*9880d681SAndroid Build Coastguard Worker   unsigned VecSize = VT.getSizeInBits();
515*9880d681SAndroid Build Coastguard Worker   unsigned EltSize = VT.getScalarSizeInBits();
516*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VecSize / 128;
517*9880d681SAndroid Build Coastguard Worker   unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
518*9880d681SAndroid Build Coastguard Worker   assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
519*9880d681SAndroid Build Coastguard Worker          "Unexpected vector size");
520*9880d681SAndroid Build Coastguard Worker   assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
521*9880d681SAndroid Build Coastguard Worker 
522*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
523*9880d681SAndroid Build Coastguard Worker     uint64_t M = RawMask[i];
524*9880d681SAndroid Build Coastguard Worker     M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
525*9880d681SAndroid Build Coastguard Worker     unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
526*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back((int)(LaneOffset + M));
527*9880d681SAndroid Build Coastguard Worker   }
528*9880d681SAndroid Build Coastguard Worker }
529*9880d681SAndroid Build Coastguard Worker 
DecodeVPERMIL2PMask(MVT VT,unsigned M2Z,ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)530*9880d681SAndroid Build Coastguard Worker void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
531*9880d681SAndroid Build Coastguard Worker                          SmallVectorImpl<int> &ShuffleMask) {
532*9880d681SAndroid Build Coastguard Worker   unsigned VecSize = VT.getSizeInBits();
533*9880d681SAndroid Build Coastguard Worker   unsigned EltSize = VT.getScalarSizeInBits();
534*9880d681SAndroid Build Coastguard Worker   unsigned NumLanes = VecSize / 128;
535*9880d681SAndroid Build Coastguard Worker   unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
536*9880d681SAndroid Build Coastguard Worker   assert((VecSize == 128 || VecSize == 256) &&
537*9880d681SAndroid Build Coastguard Worker          "Unexpected vector size");
538*9880d681SAndroid Build Coastguard Worker   assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
539*9880d681SAndroid Build Coastguard Worker 
540*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
541*9880d681SAndroid Build Coastguard Worker     // VPERMIL2 Operation.
542*9880d681SAndroid Build Coastguard Worker     // Bits[3] - Match Bit.
543*9880d681SAndroid Build Coastguard Worker     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
544*9880d681SAndroid Build Coastguard Worker     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
545*9880d681SAndroid Build Coastguard Worker     uint64_t Selector = RawMask[i];
546*9880d681SAndroid Build Coastguard Worker     unsigned MatchBit = (Selector >> 3) & 0x1;
547*9880d681SAndroid Build Coastguard Worker 
548*9880d681SAndroid Build Coastguard Worker     // M2Z[0:1]     MatchBit
549*9880d681SAndroid Build Coastguard Worker     //   0Xb           X        Source selected by Selector index.
550*9880d681SAndroid Build Coastguard Worker     //   10b           0        Source selected by Selector index.
551*9880d681SAndroid Build Coastguard Worker     //   10b           1        Zero.
552*9880d681SAndroid Build Coastguard Worker     //   11b           0        Zero.
553*9880d681SAndroid Build Coastguard Worker     //   11b           1        Source selected by Selector index.
554*9880d681SAndroid Build Coastguard Worker     if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
555*9880d681SAndroid Build Coastguard Worker       ShuffleMask.push_back(SM_SentinelZero);
556*9880d681SAndroid Build Coastguard Worker       continue;
557*9880d681SAndroid Build Coastguard Worker     }
558*9880d681SAndroid Build Coastguard Worker 
559*9880d681SAndroid Build Coastguard Worker     unsigned Index = i & ~(NumEltsPerLane - 1);
560*9880d681SAndroid Build Coastguard Worker     if (EltSize == 64)
561*9880d681SAndroid Build Coastguard Worker       Index += (Selector >> 1) & 0x1;
562*9880d681SAndroid Build Coastguard Worker     else
563*9880d681SAndroid Build Coastguard Worker       Index += Selector & 0x3;
564*9880d681SAndroid Build Coastguard Worker 
565*9880d681SAndroid Build Coastguard Worker     unsigned SrcOffset = (Selector >> 2) & 1;
566*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back((int)(SrcOffset + Index));
567*9880d681SAndroid Build Coastguard Worker   }
568*9880d681SAndroid Build Coastguard Worker }
569*9880d681SAndroid Build Coastguard Worker 
DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)570*9880d681SAndroid Build Coastguard Worker void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
571*9880d681SAndroid Build Coastguard Worker                       SmallVectorImpl<int> &ShuffleMask) {
572*9880d681SAndroid Build Coastguard Worker   uint64_t EltMaskSize = RawMask.size() - 1;
573*9880d681SAndroid Build Coastguard Worker   for (auto M : RawMask) {
574*9880d681SAndroid Build Coastguard Worker     M &= EltMaskSize;
575*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back((int)M);
576*9880d681SAndroid Build Coastguard Worker   }
577*9880d681SAndroid Build Coastguard Worker }
578*9880d681SAndroid Build Coastguard Worker 
DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,SmallVectorImpl<int> & ShuffleMask)579*9880d681SAndroid Build Coastguard Worker void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
580*9880d681SAndroid Build Coastguard Worker                       SmallVectorImpl<int> &ShuffleMask) {
581*9880d681SAndroid Build Coastguard Worker   uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
582*9880d681SAndroid Build Coastguard Worker   for (auto M : RawMask) {
583*9880d681SAndroid Build Coastguard Worker     M &= EltMaskSize;
584*9880d681SAndroid Build Coastguard Worker     ShuffleMask.push_back((int)M);
585*9880d681SAndroid Build Coastguard Worker   }
586*9880d681SAndroid Build Coastguard Worker }
587*9880d681SAndroid Build Coastguard Worker 
588*9880d681SAndroid Build Coastguard Worker } // llvm namespace
589