1*9880d681SAndroid Build Coastguard Worker //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// Vector, Reduction, and Cube instructions need to fill the entire instruction
12*9880d681SAndroid Build Coastguard Worker /// group to work correctly. This pass expands these individual instructions
13*9880d681SAndroid Build Coastguard Worker /// into several instructions that will completely fill the instruction group.
14*9880d681SAndroid Build Coastguard Worker //
15*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
16*9880d681SAndroid Build Coastguard Worker
17*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
18*9880d681SAndroid Build Coastguard Worker #include "R600Defines.h"
19*9880d681SAndroid Build Coastguard Worker #include "R600InstrInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "R600MachineFunctionInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "R600RegisterInfo.h"
22*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Worker using namespace llvm;
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker namespace {
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Worker class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
32*9880d681SAndroid Build Coastguard Worker private:
33*9880d681SAndroid Build Coastguard Worker static char ID;
34*9880d681SAndroid Build Coastguard Worker const R600InstrInfo *TII;
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Worker void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
37*9880d681SAndroid Build Coastguard Worker unsigned Op);
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker public:
R600ExpandSpecialInstrsPass(TargetMachine & tm)40*9880d681SAndroid Build Coastguard Worker R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
41*9880d681SAndroid Build Coastguard Worker TII(nullptr) { }
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
44*9880d681SAndroid Build Coastguard Worker
getPassName() const45*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
46*9880d681SAndroid Build Coastguard Worker return "R600 Expand special instructions pass";
47*9880d681SAndroid Build Coastguard Worker }
48*9880d681SAndroid Build Coastguard Worker };
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Worker char R600ExpandSpecialInstrsPass::ID = 0;
53*9880d681SAndroid Build Coastguard Worker
createR600ExpandSpecialInstrsPass(TargetMachine & TM)54*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
55*9880d681SAndroid Build Coastguard Worker return new R600ExpandSpecialInstrsPass(TM);
56*9880d681SAndroid Build Coastguard Worker }
57*9880d681SAndroid Build Coastguard Worker
SetFlagInNewMI(MachineInstr * NewMI,const MachineInstr * OldMI,unsigned Op)58*9880d681SAndroid Build Coastguard Worker void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
59*9880d681SAndroid Build Coastguard Worker const MachineInstr *OldMI, unsigned Op) {
60*9880d681SAndroid Build Coastguard Worker int OpIdx = TII->getOperandIdx(*OldMI, Op);
61*9880d681SAndroid Build Coastguard Worker if (OpIdx > -1) {
62*9880d681SAndroid Build Coastguard Worker uint64_t Val = OldMI->getOperand(OpIdx).getImm();
63*9880d681SAndroid Build Coastguard Worker TII->setImmOperand(*NewMI, Op, Val);
64*9880d681SAndroid Build Coastguard Worker }
65*9880d681SAndroid Build Coastguard Worker }
66*9880d681SAndroid Build Coastguard Worker
runOnMachineFunction(MachineFunction & MF)67*9880d681SAndroid Build Coastguard Worker bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
68*9880d681SAndroid Build Coastguard Worker const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
69*9880d681SAndroid Build Coastguard Worker TII = ST.getInstrInfo();
70*9880d681SAndroid Build Coastguard Worker
71*9880d681SAndroid Build Coastguard Worker const R600RegisterInfo &TRI = TII->getRegisterInfo();
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Worker for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
74*9880d681SAndroid Build Coastguard Worker BB != BB_E; ++BB) {
75*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *BB;
76*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I = MBB.begin();
77*9880d681SAndroid Build Coastguard Worker while (I != MBB.end()) {
78*9880d681SAndroid Build Coastguard Worker MachineInstr &MI = *I;
79*9880d681SAndroid Build Coastguard Worker I = std::next(I);
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker // Expand LDS_*_RET instructions
82*9880d681SAndroid Build Coastguard Worker if (TII->isLDSRetInstr(MI.getOpcode())) {
83*9880d681SAndroid Build Coastguard Worker int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
84*9880d681SAndroid Build Coastguard Worker assert(DstIdx != -1);
85*9880d681SAndroid Build Coastguard Worker MachineOperand &DstOp = MI.getOperand(DstIdx);
86*9880d681SAndroid Build Coastguard Worker MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
87*9880d681SAndroid Build Coastguard Worker DstOp.getReg(), AMDGPU::OQAP);
88*9880d681SAndroid Build Coastguard Worker DstOp.setReg(AMDGPU::OQAP);
89*9880d681SAndroid Build Coastguard Worker int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
90*9880d681SAndroid Build Coastguard Worker AMDGPU::OpName::pred_sel);
91*9880d681SAndroid Build Coastguard Worker int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
92*9880d681SAndroid Build Coastguard Worker AMDGPU::OpName::pred_sel);
93*9880d681SAndroid Build Coastguard Worker // Copy the pred_sel bit
94*9880d681SAndroid Build Coastguard Worker Mov->getOperand(MovPredSelIdx).setReg(
95*9880d681SAndroid Build Coastguard Worker MI.getOperand(LDSPredSelIdx).getReg());
96*9880d681SAndroid Build Coastguard Worker }
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Worker switch (MI.getOpcode()) {
99*9880d681SAndroid Build Coastguard Worker default: break;
100*9880d681SAndroid Build Coastguard Worker // Expand PRED_X to one of the PRED_SET instructions.
101*9880d681SAndroid Build Coastguard Worker case AMDGPU::PRED_X: {
102*9880d681SAndroid Build Coastguard Worker uint64_t Flags = MI.getOperand(3).getImm();
103*9880d681SAndroid Build Coastguard Worker // The native opcode used by PRED_X is stored as an immediate in the
104*9880d681SAndroid Build Coastguard Worker // third operand.
105*9880d681SAndroid Build Coastguard Worker MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
106*9880d681SAndroid Build Coastguard Worker MI.getOperand(2).getImm(), // opcode
107*9880d681SAndroid Build Coastguard Worker MI.getOperand(0).getReg(), // dst
108*9880d681SAndroid Build Coastguard Worker MI.getOperand(1).getReg(), // src0
109*9880d681SAndroid Build Coastguard Worker AMDGPU::ZERO); // src1
110*9880d681SAndroid Build Coastguard Worker TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
111*9880d681SAndroid Build Coastguard Worker if (Flags & MO_FLAG_PUSH) {
112*9880d681SAndroid Build Coastguard Worker TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
113*9880d681SAndroid Build Coastguard Worker } else {
114*9880d681SAndroid Build Coastguard Worker TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
115*9880d681SAndroid Build Coastguard Worker }
116*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
117*9880d681SAndroid Build Coastguard Worker continue;
118*9880d681SAndroid Build Coastguard Worker }
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Worker case AMDGPU::INTERP_PAIR_XY: {
121*9880d681SAndroid Build Coastguard Worker MachineInstr *BMI;
122*9880d681SAndroid Build Coastguard Worker unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
123*9880d681SAndroid Build Coastguard Worker MI.getOperand(2).getImm());
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker for (unsigned Chan = 0; Chan < 4; ++Chan) {
126*9880d681SAndroid Build Coastguard Worker unsigned DstReg;
127*9880d681SAndroid Build Coastguard Worker
128*9880d681SAndroid Build Coastguard Worker if (Chan < 2)
129*9880d681SAndroid Build Coastguard Worker DstReg = MI.getOperand(Chan).getReg();
130*9880d681SAndroid Build Coastguard Worker else
131*9880d681SAndroid Build Coastguard Worker DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
132*9880d681SAndroid Build Coastguard Worker
133*9880d681SAndroid Build Coastguard Worker BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
134*9880d681SAndroid Build Coastguard Worker DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
135*9880d681SAndroid Build Coastguard Worker
136*9880d681SAndroid Build Coastguard Worker if (Chan > 0) {
137*9880d681SAndroid Build Coastguard Worker BMI->bundleWithPred();
138*9880d681SAndroid Build Coastguard Worker }
139*9880d681SAndroid Build Coastguard Worker if (Chan >= 2)
140*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_MASK);
141*9880d681SAndroid Build Coastguard Worker if (Chan != 3)
142*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
143*9880d681SAndroid Build Coastguard Worker }
144*9880d681SAndroid Build Coastguard Worker
145*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
146*9880d681SAndroid Build Coastguard Worker continue;
147*9880d681SAndroid Build Coastguard Worker }
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Worker case AMDGPU::INTERP_PAIR_ZW: {
150*9880d681SAndroid Build Coastguard Worker MachineInstr *BMI;
151*9880d681SAndroid Build Coastguard Worker unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
152*9880d681SAndroid Build Coastguard Worker MI.getOperand(2).getImm());
153*9880d681SAndroid Build Coastguard Worker
154*9880d681SAndroid Build Coastguard Worker for (unsigned Chan = 0; Chan < 4; ++Chan) {
155*9880d681SAndroid Build Coastguard Worker unsigned DstReg;
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker if (Chan < 2)
158*9880d681SAndroid Build Coastguard Worker DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
159*9880d681SAndroid Build Coastguard Worker else
160*9880d681SAndroid Build Coastguard Worker DstReg = MI.getOperand(Chan-2).getReg();
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Worker BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
163*9880d681SAndroid Build Coastguard Worker DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Worker if (Chan > 0) {
166*9880d681SAndroid Build Coastguard Worker BMI->bundleWithPred();
167*9880d681SAndroid Build Coastguard Worker }
168*9880d681SAndroid Build Coastguard Worker if (Chan < 2)
169*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_MASK);
170*9880d681SAndroid Build Coastguard Worker if (Chan != 3)
171*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
172*9880d681SAndroid Build Coastguard Worker }
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
175*9880d681SAndroid Build Coastguard Worker continue;
176*9880d681SAndroid Build Coastguard Worker }
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Worker case AMDGPU::INTERP_VEC_LOAD: {
179*9880d681SAndroid Build Coastguard Worker const R600RegisterInfo &TRI = TII->getRegisterInfo();
180*9880d681SAndroid Build Coastguard Worker MachineInstr *BMI;
181*9880d681SAndroid Build Coastguard Worker unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
182*9880d681SAndroid Build Coastguard Worker MI.getOperand(1).getImm());
183*9880d681SAndroid Build Coastguard Worker unsigned DstReg = MI.getOperand(0).getReg();
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Worker for (unsigned Chan = 0; Chan < 4; ++Chan) {
186*9880d681SAndroid Build Coastguard Worker BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
187*9880d681SAndroid Build Coastguard Worker TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
188*9880d681SAndroid Build Coastguard Worker if (Chan > 0) {
189*9880d681SAndroid Build Coastguard Worker BMI->bundleWithPred();
190*9880d681SAndroid Build Coastguard Worker }
191*9880d681SAndroid Build Coastguard Worker if (Chan != 3)
192*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
193*9880d681SAndroid Build Coastguard Worker }
194*9880d681SAndroid Build Coastguard Worker
195*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
196*9880d681SAndroid Build Coastguard Worker continue;
197*9880d681SAndroid Build Coastguard Worker }
198*9880d681SAndroid Build Coastguard Worker case AMDGPU::DOT_4: {
199*9880d681SAndroid Build Coastguard Worker
200*9880d681SAndroid Build Coastguard Worker const R600RegisterInfo &TRI = TII->getRegisterInfo();
201*9880d681SAndroid Build Coastguard Worker
202*9880d681SAndroid Build Coastguard Worker unsigned DstReg = MI.getOperand(0).getReg();
203*9880d681SAndroid Build Coastguard Worker unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
204*9880d681SAndroid Build Coastguard Worker
205*9880d681SAndroid Build Coastguard Worker for (unsigned Chan = 0; Chan < 4; ++Chan) {
206*9880d681SAndroid Build Coastguard Worker bool Mask = (Chan != TRI.getHWRegChan(DstReg));
207*9880d681SAndroid Build Coastguard Worker unsigned SubDstReg =
208*9880d681SAndroid Build Coastguard Worker AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
209*9880d681SAndroid Build Coastguard Worker MachineInstr *BMI =
210*9880d681SAndroid Build Coastguard Worker TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
211*9880d681SAndroid Build Coastguard Worker if (Chan > 0) {
212*9880d681SAndroid Build Coastguard Worker BMI->bundleWithPred();
213*9880d681SAndroid Build Coastguard Worker }
214*9880d681SAndroid Build Coastguard Worker if (Mask) {
215*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_MASK);
216*9880d681SAndroid Build Coastguard Worker }
217*9880d681SAndroid Build Coastguard Worker if (Chan != 3)
218*9880d681SAndroid Build Coastguard Worker TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
219*9880d681SAndroid Build Coastguard Worker unsigned Opcode = BMI->getOpcode();
220*9880d681SAndroid Build Coastguard Worker // While not strictly necessary from hw point of view, we force
221*9880d681SAndroid Build Coastguard Worker // all src operands of a dot4 inst to belong to the same slot.
222*9880d681SAndroid Build Coastguard Worker unsigned Src0 = BMI->getOperand(
223*9880d681SAndroid Build Coastguard Worker TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
224*9880d681SAndroid Build Coastguard Worker .getReg();
225*9880d681SAndroid Build Coastguard Worker unsigned Src1 = BMI->getOperand(
226*9880d681SAndroid Build Coastguard Worker TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
227*9880d681SAndroid Build Coastguard Worker .getReg();
228*9880d681SAndroid Build Coastguard Worker (void) Src0;
229*9880d681SAndroid Build Coastguard Worker (void) Src1;
230*9880d681SAndroid Build Coastguard Worker if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
231*9880d681SAndroid Build Coastguard Worker (TRI.getEncodingValue(Src1) & 0xff) < 127)
232*9880d681SAndroid Build Coastguard Worker assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
233*9880d681SAndroid Build Coastguard Worker }
234*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
235*9880d681SAndroid Build Coastguard Worker continue;
236*9880d681SAndroid Build Coastguard Worker }
237*9880d681SAndroid Build Coastguard Worker }
238*9880d681SAndroid Build Coastguard Worker
239*9880d681SAndroid Build Coastguard Worker bool IsReduction = TII->isReductionOp(MI.getOpcode());
240*9880d681SAndroid Build Coastguard Worker bool IsVector = TII->isVector(MI);
241*9880d681SAndroid Build Coastguard Worker bool IsCube = TII->isCubeOp(MI.getOpcode());
242*9880d681SAndroid Build Coastguard Worker if (!IsReduction && !IsVector && !IsCube) {
243*9880d681SAndroid Build Coastguard Worker continue;
244*9880d681SAndroid Build Coastguard Worker }
245*9880d681SAndroid Build Coastguard Worker
246*9880d681SAndroid Build Coastguard Worker // Expand the instruction
247*9880d681SAndroid Build Coastguard Worker //
248*9880d681SAndroid Build Coastguard Worker // Reduction instructions:
249*9880d681SAndroid Build Coastguard Worker // T0_X = DP4 T1_XYZW, T2_XYZW
250*9880d681SAndroid Build Coastguard Worker // becomes:
251*9880d681SAndroid Build Coastguard Worker // TO_X = DP4 T1_X, T2_X
252*9880d681SAndroid Build Coastguard Worker // TO_Y (write masked) = DP4 T1_Y, T2_Y
253*9880d681SAndroid Build Coastguard Worker // TO_Z (write masked) = DP4 T1_Z, T2_Z
254*9880d681SAndroid Build Coastguard Worker // TO_W (write masked) = DP4 T1_W, T2_W
255*9880d681SAndroid Build Coastguard Worker //
256*9880d681SAndroid Build Coastguard Worker // Vector instructions:
257*9880d681SAndroid Build Coastguard Worker // T0_X = MULLO_INT T1_X, T2_X
258*9880d681SAndroid Build Coastguard Worker // becomes:
259*9880d681SAndroid Build Coastguard Worker // T0_X = MULLO_INT T1_X, T2_X
260*9880d681SAndroid Build Coastguard Worker // T0_Y (write masked) = MULLO_INT T1_X, T2_X
261*9880d681SAndroid Build Coastguard Worker // T0_Z (write masked) = MULLO_INT T1_X, T2_X
262*9880d681SAndroid Build Coastguard Worker // T0_W (write masked) = MULLO_INT T1_X, T2_X
263*9880d681SAndroid Build Coastguard Worker //
264*9880d681SAndroid Build Coastguard Worker // Cube instructions:
265*9880d681SAndroid Build Coastguard Worker // T0_XYZW = CUBE T1_XYZW
266*9880d681SAndroid Build Coastguard Worker // becomes:
267*9880d681SAndroid Build Coastguard Worker // TO_X = CUBE T1_Z, T1_Y
268*9880d681SAndroid Build Coastguard Worker // T0_Y = CUBE T1_Z, T1_X
269*9880d681SAndroid Build Coastguard Worker // T0_Z = CUBE T1_X, T1_Z
270*9880d681SAndroid Build Coastguard Worker // T0_W = CUBE T1_Y, T1_Z
271*9880d681SAndroid Build Coastguard Worker for (unsigned Chan = 0; Chan < 4; Chan++) {
272*9880d681SAndroid Build Coastguard Worker unsigned DstReg = MI.getOperand(
273*9880d681SAndroid Build Coastguard Worker TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
274*9880d681SAndroid Build Coastguard Worker unsigned Src0 = MI.getOperand(
275*9880d681SAndroid Build Coastguard Worker TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
276*9880d681SAndroid Build Coastguard Worker unsigned Src1 = 0;
277*9880d681SAndroid Build Coastguard Worker
278*9880d681SAndroid Build Coastguard Worker // Determine the correct source registers
279*9880d681SAndroid Build Coastguard Worker if (!IsCube) {
280*9880d681SAndroid Build Coastguard Worker int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
281*9880d681SAndroid Build Coastguard Worker if (Src1Idx != -1) {
282*9880d681SAndroid Build Coastguard Worker Src1 = MI.getOperand(Src1Idx).getReg();
283*9880d681SAndroid Build Coastguard Worker }
284*9880d681SAndroid Build Coastguard Worker }
285*9880d681SAndroid Build Coastguard Worker if (IsReduction) {
286*9880d681SAndroid Build Coastguard Worker unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
287*9880d681SAndroid Build Coastguard Worker Src0 = TRI.getSubReg(Src0, SubRegIndex);
288*9880d681SAndroid Build Coastguard Worker Src1 = TRI.getSubReg(Src1, SubRegIndex);
289*9880d681SAndroid Build Coastguard Worker } else if (IsCube) {
290*9880d681SAndroid Build Coastguard Worker static const int CubeSrcSwz[] = {2, 2, 0, 1};
291*9880d681SAndroid Build Coastguard Worker unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
292*9880d681SAndroid Build Coastguard Worker unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
293*9880d681SAndroid Build Coastguard Worker Src1 = TRI.getSubReg(Src0, SubRegIndex1);
294*9880d681SAndroid Build Coastguard Worker Src0 = TRI.getSubReg(Src0, SubRegIndex0);
295*9880d681SAndroid Build Coastguard Worker }
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Worker // Determine the correct destination registers;
298*9880d681SAndroid Build Coastguard Worker bool Mask = false;
299*9880d681SAndroid Build Coastguard Worker bool NotLast = true;
300*9880d681SAndroid Build Coastguard Worker if (IsCube) {
301*9880d681SAndroid Build Coastguard Worker unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
302*9880d681SAndroid Build Coastguard Worker DstReg = TRI.getSubReg(DstReg, SubRegIndex);
303*9880d681SAndroid Build Coastguard Worker } else {
304*9880d681SAndroid Build Coastguard Worker // Mask the write if the original instruction does not write to
305*9880d681SAndroid Build Coastguard Worker // the current Channel.
306*9880d681SAndroid Build Coastguard Worker Mask = (Chan != TRI.getHWRegChan(DstReg));
307*9880d681SAndroid Build Coastguard Worker unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
308*9880d681SAndroid Build Coastguard Worker DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
309*9880d681SAndroid Build Coastguard Worker }
310*9880d681SAndroid Build Coastguard Worker
311*9880d681SAndroid Build Coastguard Worker // Set the IsLast bit
312*9880d681SAndroid Build Coastguard Worker NotLast = (Chan != 3 );
313*9880d681SAndroid Build Coastguard Worker
314*9880d681SAndroid Build Coastguard Worker // Add the new instruction
315*9880d681SAndroid Build Coastguard Worker unsigned Opcode = MI.getOpcode();
316*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
317*9880d681SAndroid Build Coastguard Worker case AMDGPU::CUBE_r600_pseudo:
318*9880d681SAndroid Build Coastguard Worker Opcode = AMDGPU::CUBE_r600_real;
319*9880d681SAndroid Build Coastguard Worker break;
320*9880d681SAndroid Build Coastguard Worker case AMDGPU::CUBE_eg_pseudo:
321*9880d681SAndroid Build Coastguard Worker Opcode = AMDGPU::CUBE_eg_real;
322*9880d681SAndroid Build Coastguard Worker break;
323*9880d681SAndroid Build Coastguard Worker default:
324*9880d681SAndroid Build Coastguard Worker break;
325*9880d681SAndroid Build Coastguard Worker }
326*9880d681SAndroid Build Coastguard Worker
327*9880d681SAndroid Build Coastguard Worker MachineInstr *NewMI =
328*9880d681SAndroid Build Coastguard Worker TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
329*9880d681SAndroid Build Coastguard Worker
330*9880d681SAndroid Build Coastguard Worker if (Chan != 0)
331*9880d681SAndroid Build Coastguard Worker NewMI->bundleWithPred();
332*9880d681SAndroid Build Coastguard Worker if (Mask) {
333*9880d681SAndroid Build Coastguard Worker TII->addFlag(*NewMI, 0, MO_FLAG_MASK);
334*9880d681SAndroid Build Coastguard Worker }
335*9880d681SAndroid Build Coastguard Worker if (NotLast) {
336*9880d681SAndroid Build Coastguard Worker TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
337*9880d681SAndroid Build Coastguard Worker }
338*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
339*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
340*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
341*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
342*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
343*9880d681SAndroid Build Coastguard Worker SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
344*9880d681SAndroid Build Coastguard Worker }
345*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
346*9880d681SAndroid Build Coastguard Worker }
347*9880d681SAndroid Build Coastguard Worker }
348*9880d681SAndroid Build Coastguard Worker return false;
349*9880d681SAndroid Build Coastguard Worker }
350