xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// Vector, Reduction, and Cube instructions need to fill the entire instruction
12*9880d681SAndroid Build Coastguard Worker /// group to work correctly.  This pass expands these individual instructions
13*9880d681SAndroid Build Coastguard Worker /// into several instructions that will completely fill the instruction group.
14*9880d681SAndroid Build Coastguard Worker //
15*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
16*9880d681SAndroid Build Coastguard Worker 
17*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
18*9880d681SAndroid Build Coastguard Worker #include "R600Defines.h"
19*9880d681SAndroid Build Coastguard Worker #include "R600InstrInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "R600MachineFunctionInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "R600RegisterInfo.h"
22*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
26*9880d681SAndroid Build Coastguard Worker 
27*9880d681SAndroid Build Coastguard Worker using namespace llvm;
28*9880d681SAndroid Build Coastguard Worker 
29*9880d681SAndroid Build Coastguard Worker namespace {
30*9880d681SAndroid Build Coastguard Worker 
31*9880d681SAndroid Build Coastguard Worker class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
32*9880d681SAndroid Build Coastguard Worker private:
33*9880d681SAndroid Build Coastguard Worker   static char ID;
34*9880d681SAndroid Build Coastguard Worker   const R600InstrInfo *TII;
35*9880d681SAndroid Build Coastguard Worker 
36*9880d681SAndroid Build Coastguard Worker   void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
37*9880d681SAndroid Build Coastguard Worker       unsigned Op);
38*9880d681SAndroid Build Coastguard Worker 
39*9880d681SAndroid Build Coastguard Worker public:
R600ExpandSpecialInstrsPass(TargetMachine & tm)40*9880d681SAndroid Build Coastguard Worker   R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
41*9880d681SAndroid Build Coastguard Worker     TII(nullptr) { }
42*9880d681SAndroid Build Coastguard Worker 
43*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override;
44*9880d681SAndroid Build Coastguard Worker 
getPassName() const45*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
46*9880d681SAndroid Build Coastguard Worker     return "R600 Expand special instructions pass";
47*9880d681SAndroid Build Coastguard Worker   }
48*9880d681SAndroid Build Coastguard Worker };
49*9880d681SAndroid Build Coastguard Worker 
50*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
51*9880d681SAndroid Build Coastguard Worker 
52*9880d681SAndroid Build Coastguard Worker char R600ExpandSpecialInstrsPass::ID = 0;
53*9880d681SAndroid Build Coastguard Worker 
createR600ExpandSpecialInstrsPass(TargetMachine & TM)54*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
55*9880d681SAndroid Build Coastguard Worker   return new R600ExpandSpecialInstrsPass(TM);
56*9880d681SAndroid Build Coastguard Worker }
57*9880d681SAndroid Build Coastguard Worker 
SetFlagInNewMI(MachineInstr * NewMI,const MachineInstr * OldMI,unsigned Op)58*9880d681SAndroid Build Coastguard Worker void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
59*9880d681SAndroid Build Coastguard Worker     const MachineInstr *OldMI, unsigned Op) {
60*9880d681SAndroid Build Coastguard Worker   int OpIdx = TII->getOperandIdx(*OldMI, Op);
61*9880d681SAndroid Build Coastguard Worker   if (OpIdx > -1) {
62*9880d681SAndroid Build Coastguard Worker     uint64_t Val = OldMI->getOperand(OpIdx).getImm();
63*9880d681SAndroid Build Coastguard Worker     TII->setImmOperand(*NewMI, Op, Val);
64*9880d681SAndroid Build Coastguard Worker   }
65*9880d681SAndroid Build Coastguard Worker }
66*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)67*9880d681SAndroid Build Coastguard Worker bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
68*9880d681SAndroid Build Coastguard Worker   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
69*9880d681SAndroid Build Coastguard Worker   TII = ST.getInstrInfo();
70*9880d681SAndroid Build Coastguard Worker 
71*9880d681SAndroid Build Coastguard Worker   const R600RegisterInfo &TRI = TII->getRegisterInfo();
72*9880d681SAndroid Build Coastguard Worker 
73*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
74*9880d681SAndroid Build Coastguard Worker                                                   BB != BB_E; ++BB) {
75*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *BB;
76*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock::iterator I = MBB.begin();
77*9880d681SAndroid Build Coastguard Worker     while (I != MBB.end()) {
78*9880d681SAndroid Build Coastguard Worker       MachineInstr &MI = *I;
79*9880d681SAndroid Build Coastguard Worker       I = std::next(I);
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker       // Expand LDS_*_RET instructions
82*9880d681SAndroid Build Coastguard Worker       if (TII->isLDSRetInstr(MI.getOpcode())) {
83*9880d681SAndroid Build Coastguard Worker         int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
84*9880d681SAndroid Build Coastguard Worker         assert(DstIdx != -1);
85*9880d681SAndroid Build Coastguard Worker         MachineOperand &DstOp = MI.getOperand(DstIdx);
86*9880d681SAndroid Build Coastguard Worker         MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
87*9880d681SAndroid Build Coastguard Worker                                                DstOp.getReg(), AMDGPU::OQAP);
88*9880d681SAndroid Build Coastguard Worker         DstOp.setReg(AMDGPU::OQAP);
89*9880d681SAndroid Build Coastguard Worker         int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
90*9880d681SAndroid Build Coastguard Worker                                            AMDGPU::OpName::pred_sel);
91*9880d681SAndroid Build Coastguard Worker         int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
92*9880d681SAndroid Build Coastguard Worker                                            AMDGPU::OpName::pred_sel);
93*9880d681SAndroid Build Coastguard Worker         // Copy the pred_sel bit
94*9880d681SAndroid Build Coastguard Worker         Mov->getOperand(MovPredSelIdx).setReg(
95*9880d681SAndroid Build Coastguard Worker             MI.getOperand(LDSPredSelIdx).getReg());
96*9880d681SAndroid Build Coastguard Worker       }
97*9880d681SAndroid Build Coastguard Worker 
98*9880d681SAndroid Build Coastguard Worker       switch (MI.getOpcode()) {
99*9880d681SAndroid Build Coastguard Worker       default: break;
100*9880d681SAndroid Build Coastguard Worker       // Expand PRED_X to one of the PRED_SET instructions.
101*9880d681SAndroid Build Coastguard Worker       case AMDGPU::PRED_X: {
102*9880d681SAndroid Build Coastguard Worker         uint64_t Flags = MI.getOperand(3).getImm();
103*9880d681SAndroid Build Coastguard Worker         // The native opcode used by PRED_X is stored as an immediate in the
104*9880d681SAndroid Build Coastguard Worker         // third operand.
105*9880d681SAndroid Build Coastguard Worker         MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
106*9880d681SAndroid Build Coastguard Worker                                             MI.getOperand(2).getImm(), // opcode
107*9880d681SAndroid Build Coastguard Worker                                             MI.getOperand(0).getReg(), // dst
108*9880d681SAndroid Build Coastguard Worker                                             MI.getOperand(1).getReg(), // src0
109*9880d681SAndroid Build Coastguard Worker                                             AMDGPU::ZERO);             // src1
110*9880d681SAndroid Build Coastguard Worker         TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
111*9880d681SAndroid Build Coastguard Worker         if (Flags & MO_FLAG_PUSH) {
112*9880d681SAndroid Build Coastguard Worker           TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
113*9880d681SAndroid Build Coastguard Worker         } else {
114*9880d681SAndroid Build Coastguard Worker           TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
115*9880d681SAndroid Build Coastguard Worker         }
116*9880d681SAndroid Build Coastguard Worker         MI.eraseFromParent();
117*9880d681SAndroid Build Coastguard Worker         continue;
118*9880d681SAndroid Build Coastguard Worker         }
119*9880d681SAndroid Build Coastguard Worker 
120*9880d681SAndroid Build Coastguard Worker       case AMDGPU::INTERP_PAIR_XY: {
121*9880d681SAndroid Build Coastguard Worker         MachineInstr *BMI;
122*9880d681SAndroid Build Coastguard Worker         unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
123*9880d681SAndroid Build Coastguard Worker                 MI.getOperand(2).getImm());
124*9880d681SAndroid Build Coastguard Worker 
125*9880d681SAndroid Build Coastguard Worker         for (unsigned Chan = 0; Chan < 4; ++Chan) {
126*9880d681SAndroid Build Coastguard Worker           unsigned DstReg;
127*9880d681SAndroid Build Coastguard Worker 
128*9880d681SAndroid Build Coastguard Worker           if (Chan < 2)
129*9880d681SAndroid Build Coastguard Worker             DstReg = MI.getOperand(Chan).getReg();
130*9880d681SAndroid Build Coastguard Worker           else
131*9880d681SAndroid Build Coastguard Worker             DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
132*9880d681SAndroid Build Coastguard Worker 
133*9880d681SAndroid Build Coastguard Worker           BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
134*9880d681SAndroid Build Coastguard Worker               DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
135*9880d681SAndroid Build Coastguard Worker 
136*9880d681SAndroid Build Coastguard Worker           if (Chan > 0) {
137*9880d681SAndroid Build Coastguard Worker             BMI->bundleWithPred();
138*9880d681SAndroid Build Coastguard Worker           }
139*9880d681SAndroid Build Coastguard Worker           if (Chan >= 2)
140*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_MASK);
141*9880d681SAndroid Build Coastguard Worker           if (Chan != 3)
142*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
143*9880d681SAndroid Build Coastguard Worker         }
144*9880d681SAndroid Build Coastguard Worker 
145*9880d681SAndroid Build Coastguard Worker         MI.eraseFromParent();
146*9880d681SAndroid Build Coastguard Worker         continue;
147*9880d681SAndroid Build Coastguard Worker         }
148*9880d681SAndroid Build Coastguard Worker 
149*9880d681SAndroid Build Coastguard Worker       case AMDGPU::INTERP_PAIR_ZW: {
150*9880d681SAndroid Build Coastguard Worker         MachineInstr *BMI;
151*9880d681SAndroid Build Coastguard Worker         unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
152*9880d681SAndroid Build Coastguard Worker                 MI.getOperand(2).getImm());
153*9880d681SAndroid Build Coastguard Worker 
154*9880d681SAndroid Build Coastguard Worker         for (unsigned Chan = 0; Chan < 4; ++Chan) {
155*9880d681SAndroid Build Coastguard Worker           unsigned DstReg;
156*9880d681SAndroid Build Coastguard Worker 
157*9880d681SAndroid Build Coastguard Worker           if (Chan < 2)
158*9880d681SAndroid Build Coastguard Worker             DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
159*9880d681SAndroid Build Coastguard Worker           else
160*9880d681SAndroid Build Coastguard Worker             DstReg = MI.getOperand(Chan-2).getReg();
161*9880d681SAndroid Build Coastguard Worker 
162*9880d681SAndroid Build Coastguard Worker           BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
163*9880d681SAndroid Build Coastguard Worker               DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
164*9880d681SAndroid Build Coastguard Worker 
165*9880d681SAndroid Build Coastguard Worker           if (Chan > 0) {
166*9880d681SAndroid Build Coastguard Worker             BMI->bundleWithPred();
167*9880d681SAndroid Build Coastguard Worker           }
168*9880d681SAndroid Build Coastguard Worker           if (Chan < 2)
169*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_MASK);
170*9880d681SAndroid Build Coastguard Worker           if (Chan != 3)
171*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
172*9880d681SAndroid Build Coastguard Worker         }
173*9880d681SAndroid Build Coastguard Worker 
174*9880d681SAndroid Build Coastguard Worker         MI.eraseFromParent();
175*9880d681SAndroid Build Coastguard Worker         continue;
176*9880d681SAndroid Build Coastguard Worker         }
177*9880d681SAndroid Build Coastguard Worker 
178*9880d681SAndroid Build Coastguard Worker       case AMDGPU::INTERP_VEC_LOAD: {
179*9880d681SAndroid Build Coastguard Worker         const R600RegisterInfo &TRI = TII->getRegisterInfo();
180*9880d681SAndroid Build Coastguard Worker         MachineInstr *BMI;
181*9880d681SAndroid Build Coastguard Worker         unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
182*9880d681SAndroid Build Coastguard Worker                 MI.getOperand(1).getImm());
183*9880d681SAndroid Build Coastguard Worker         unsigned DstReg = MI.getOperand(0).getReg();
184*9880d681SAndroid Build Coastguard Worker 
185*9880d681SAndroid Build Coastguard Worker         for (unsigned Chan = 0; Chan < 4; ++Chan) {
186*9880d681SAndroid Build Coastguard Worker           BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
187*9880d681SAndroid Build Coastguard Worker               TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
188*9880d681SAndroid Build Coastguard Worker           if (Chan > 0) {
189*9880d681SAndroid Build Coastguard Worker             BMI->bundleWithPred();
190*9880d681SAndroid Build Coastguard Worker           }
191*9880d681SAndroid Build Coastguard Worker           if (Chan != 3)
192*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
193*9880d681SAndroid Build Coastguard Worker         }
194*9880d681SAndroid Build Coastguard Worker 
195*9880d681SAndroid Build Coastguard Worker         MI.eraseFromParent();
196*9880d681SAndroid Build Coastguard Worker         continue;
197*9880d681SAndroid Build Coastguard Worker         }
198*9880d681SAndroid Build Coastguard Worker       case AMDGPU::DOT_4: {
199*9880d681SAndroid Build Coastguard Worker 
200*9880d681SAndroid Build Coastguard Worker         const R600RegisterInfo &TRI = TII->getRegisterInfo();
201*9880d681SAndroid Build Coastguard Worker 
202*9880d681SAndroid Build Coastguard Worker         unsigned DstReg = MI.getOperand(0).getReg();
203*9880d681SAndroid Build Coastguard Worker         unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
204*9880d681SAndroid Build Coastguard Worker 
205*9880d681SAndroid Build Coastguard Worker         for (unsigned Chan = 0; Chan < 4; ++Chan) {
206*9880d681SAndroid Build Coastguard Worker           bool Mask = (Chan != TRI.getHWRegChan(DstReg));
207*9880d681SAndroid Build Coastguard Worker           unsigned SubDstReg =
208*9880d681SAndroid Build Coastguard Worker               AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
209*9880d681SAndroid Build Coastguard Worker           MachineInstr *BMI =
210*9880d681SAndroid Build Coastguard Worker               TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
211*9880d681SAndroid Build Coastguard Worker           if (Chan > 0) {
212*9880d681SAndroid Build Coastguard Worker             BMI->bundleWithPred();
213*9880d681SAndroid Build Coastguard Worker           }
214*9880d681SAndroid Build Coastguard Worker           if (Mask) {
215*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_MASK);
216*9880d681SAndroid Build Coastguard Worker           }
217*9880d681SAndroid Build Coastguard Worker           if (Chan != 3)
218*9880d681SAndroid Build Coastguard Worker             TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
219*9880d681SAndroid Build Coastguard Worker           unsigned Opcode = BMI->getOpcode();
220*9880d681SAndroid Build Coastguard Worker           // While not strictly necessary from hw point of view, we force
221*9880d681SAndroid Build Coastguard Worker           // all src operands of a dot4 inst to belong to the same slot.
222*9880d681SAndroid Build Coastguard Worker           unsigned Src0 = BMI->getOperand(
223*9880d681SAndroid Build Coastguard Worker               TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
224*9880d681SAndroid Build Coastguard Worker               .getReg();
225*9880d681SAndroid Build Coastguard Worker           unsigned Src1 = BMI->getOperand(
226*9880d681SAndroid Build Coastguard Worker               TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
227*9880d681SAndroid Build Coastguard Worker               .getReg();
228*9880d681SAndroid Build Coastguard Worker           (void) Src0;
229*9880d681SAndroid Build Coastguard Worker           (void) Src1;
230*9880d681SAndroid Build Coastguard Worker           if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
231*9880d681SAndroid Build Coastguard Worker               (TRI.getEncodingValue(Src1) & 0xff) < 127)
232*9880d681SAndroid Build Coastguard Worker             assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
233*9880d681SAndroid Build Coastguard Worker         }
234*9880d681SAndroid Build Coastguard Worker         MI.eraseFromParent();
235*9880d681SAndroid Build Coastguard Worker         continue;
236*9880d681SAndroid Build Coastguard Worker       }
237*9880d681SAndroid Build Coastguard Worker       }
238*9880d681SAndroid Build Coastguard Worker 
239*9880d681SAndroid Build Coastguard Worker       bool IsReduction = TII->isReductionOp(MI.getOpcode());
240*9880d681SAndroid Build Coastguard Worker       bool IsVector = TII->isVector(MI);
241*9880d681SAndroid Build Coastguard Worker       bool IsCube = TII->isCubeOp(MI.getOpcode());
242*9880d681SAndroid Build Coastguard Worker       if (!IsReduction && !IsVector && !IsCube) {
243*9880d681SAndroid Build Coastguard Worker         continue;
244*9880d681SAndroid Build Coastguard Worker       }
245*9880d681SAndroid Build Coastguard Worker 
246*9880d681SAndroid Build Coastguard Worker       // Expand the instruction
247*9880d681SAndroid Build Coastguard Worker       //
248*9880d681SAndroid Build Coastguard Worker       // Reduction instructions:
249*9880d681SAndroid Build Coastguard Worker       // T0_X = DP4 T1_XYZW, T2_XYZW
250*9880d681SAndroid Build Coastguard Worker       // becomes:
251*9880d681SAndroid Build Coastguard Worker       // TO_X = DP4 T1_X, T2_X
252*9880d681SAndroid Build Coastguard Worker       // TO_Y (write masked) = DP4 T1_Y, T2_Y
253*9880d681SAndroid Build Coastguard Worker       // TO_Z (write masked) = DP4 T1_Z, T2_Z
254*9880d681SAndroid Build Coastguard Worker       // TO_W (write masked) = DP4 T1_W, T2_W
255*9880d681SAndroid Build Coastguard Worker       //
256*9880d681SAndroid Build Coastguard Worker       // Vector instructions:
257*9880d681SAndroid Build Coastguard Worker       // T0_X = MULLO_INT T1_X, T2_X
258*9880d681SAndroid Build Coastguard Worker       // becomes:
259*9880d681SAndroid Build Coastguard Worker       // T0_X = MULLO_INT T1_X, T2_X
260*9880d681SAndroid Build Coastguard Worker       // T0_Y (write masked) = MULLO_INT T1_X, T2_X
261*9880d681SAndroid Build Coastguard Worker       // T0_Z (write masked) = MULLO_INT T1_X, T2_X
262*9880d681SAndroid Build Coastguard Worker       // T0_W (write masked) = MULLO_INT T1_X, T2_X
263*9880d681SAndroid Build Coastguard Worker       //
264*9880d681SAndroid Build Coastguard Worker       // Cube instructions:
265*9880d681SAndroid Build Coastguard Worker       // T0_XYZW = CUBE T1_XYZW
266*9880d681SAndroid Build Coastguard Worker       // becomes:
267*9880d681SAndroid Build Coastguard Worker       // TO_X = CUBE T1_Z, T1_Y
268*9880d681SAndroid Build Coastguard Worker       // T0_Y = CUBE T1_Z, T1_X
269*9880d681SAndroid Build Coastguard Worker       // T0_Z = CUBE T1_X, T1_Z
270*9880d681SAndroid Build Coastguard Worker       // T0_W = CUBE T1_Y, T1_Z
271*9880d681SAndroid Build Coastguard Worker       for (unsigned Chan = 0; Chan < 4; Chan++) {
272*9880d681SAndroid Build Coastguard Worker         unsigned DstReg = MI.getOperand(
273*9880d681SAndroid Build Coastguard Worker                             TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
274*9880d681SAndroid Build Coastguard Worker         unsigned Src0 = MI.getOperand(
275*9880d681SAndroid Build Coastguard Worker                            TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
276*9880d681SAndroid Build Coastguard Worker         unsigned Src1 = 0;
277*9880d681SAndroid Build Coastguard Worker 
278*9880d681SAndroid Build Coastguard Worker         // Determine the correct source registers
279*9880d681SAndroid Build Coastguard Worker         if (!IsCube) {
280*9880d681SAndroid Build Coastguard Worker           int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
281*9880d681SAndroid Build Coastguard Worker           if (Src1Idx != -1) {
282*9880d681SAndroid Build Coastguard Worker             Src1 = MI.getOperand(Src1Idx).getReg();
283*9880d681SAndroid Build Coastguard Worker           }
284*9880d681SAndroid Build Coastguard Worker         }
285*9880d681SAndroid Build Coastguard Worker         if (IsReduction) {
286*9880d681SAndroid Build Coastguard Worker           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
287*9880d681SAndroid Build Coastguard Worker           Src0 = TRI.getSubReg(Src0, SubRegIndex);
288*9880d681SAndroid Build Coastguard Worker           Src1 = TRI.getSubReg(Src1, SubRegIndex);
289*9880d681SAndroid Build Coastguard Worker         } else if (IsCube) {
290*9880d681SAndroid Build Coastguard Worker           static const int CubeSrcSwz[] = {2, 2, 0, 1};
291*9880d681SAndroid Build Coastguard Worker           unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
292*9880d681SAndroid Build Coastguard Worker           unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
293*9880d681SAndroid Build Coastguard Worker           Src1 = TRI.getSubReg(Src0, SubRegIndex1);
294*9880d681SAndroid Build Coastguard Worker           Src0 = TRI.getSubReg(Src0, SubRegIndex0);
295*9880d681SAndroid Build Coastguard Worker         }
296*9880d681SAndroid Build Coastguard Worker 
297*9880d681SAndroid Build Coastguard Worker         // Determine the correct destination registers;
298*9880d681SAndroid Build Coastguard Worker         bool Mask = false;
299*9880d681SAndroid Build Coastguard Worker         bool NotLast = true;
300*9880d681SAndroid Build Coastguard Worker         if (IsCube) {
301*9880d681SAndroid Build Coastguard Worker           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
302*9880d681SAndroid Build Coastguard Worker           DstReg = TRI.getSubReg(DstReg, SubRegIndex);
303*9880d681SAndroid Build Coastguard Worker         } else {
304*9880d681SAndroid Build Coastguard Worker           // Mask the write if the original instruction does not write to
305*9880d681SAndroid Build Coastguard Worker           // the current Channel.
306*9880d681SAndroid Build Coastguard Worker           Mask = (Chan != TRI.getHWRegChan(DstReg));
307*9880d681SAndroid Build Coastguard Worker           unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
308*9880d681SAndroid Build Coastguard Worker           DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
309*9880d681SAndroid Build Coastguard Worker         }
310*9880d681SAndroid Build Coastguard Worker 
311*9880d681SAndroid Build Coastguard Worker         // Set the IsLast bit
312*9880d681SAndroid Build Coastguard Worker         NotLast = (Chan != 3 );
313*9880d681SAndroid Build Coastguard Worker 
314*9880d681SAndroid Build Coastguard Worker         // Add the new instruction
315*9880d681SAndroid Build Coastguard Worker         unsigned Opcode = MI.getOpcode();
316*9880d681SAndroid Build Coastguard Worker         switch (Opcode) {
317*9880d681SAndroid Build Coastguard Worker         case AMDGPU::CUBE_r600_pseudo:
318*9880d681SAndroid Build Coastguard Worker           Opcode = AMDGPU::CUBE_r600_real;
319*9880d681SAndroid Build Coastguard Worker           break;
320*9880d681SAndroid Build Coastguard Worker         case AMDGPU::CUBE_eg_pseudo:
321*9880d681SAndroid Build Coastguard Worker           Opcode = AMDGPU::CUBE_eg_real;
322*9880d681SAndroid Build Coastguard Worker           break;
323*9880d681SAndroid Build Coastguard Worker         default:
324*9880d681SAndroid Build Coastguard Worker           break;
325*9880d681SAndroid Build Coastguard Worker         }
326*9880d681SAndroid Build Coastguard Worker 
327*9880d681SAndroid Build Coastguard Worker         MachineInstr *NewMI =
328*9880d681SAndroid Build Coastguard Worker           TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
329*9880d681SAndroid Build Coastguard Worker 
330*9880d681SAndroid Build Coastguard Worker         if (Chan != 0)
331*9880d681SAndroid Build Coastguard Worker           NewMI->bundleWithPred();
332*9880d681SAndroid Build Coastguard Worker         if (Mask) {
333*9880d681SAndroid Build Coastguard Worker           TII->addFlag(*NewMI, 0, MO_FLAG_MASK);
334*9880d681SAndroid Build Coastguard Worker         }
335*9880d681SAndroid Build Coastguard Worker         if (NotLast) {
336*9880d681SAndroid Build Coastguard Worker           TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
337*9880d681SAndroid Build Coastguard Worker         }
338*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
339*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
340*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
341*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
342*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
343*9880d681SAndroid Build Coastguard Worker         SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
344*9880d681SAndroid Build Coastguard Worker       }
345*9880d681SAndroid Build Coastguard Worker       MI.eraseFromParent();
346*9880d681SAndroid Build Coastguard Worker     }
347*9880d681SAndroid Build Coastguard Worker   }
348*9880d681SAndroid Build Coastguard Worker   return false;
349*9880d681SAndroid Build Coastguard Worker }
350