1*9880d681SAndroid Build Coastguard Worker //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief This pass lowers the pseudo control flow instructions to real
12*9880d681SAndroid Build Coastguard Worker /// machine instructions.
13*9880d681SAndroid Build Coastguard Worker ///
14*9880d681SAndroid Build Coastguard Worker /// All control flow is handled using predicated instructions and
15*9880d681SAndroid Build Coastguard Worker /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
16*9880d681SAndroid Build Coastguard Worker /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
17*9880d681SAndroid Build Coastguard Worker /// by writting to the 64-bit EXEC register (each bit corresponds to a
18*9880d681SAndroid Build Coastguard Worker /// single vector ALU). Typically, for predicates, a vector ALU will write
19*9880d681SAndroid Build Coastguard Worker /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
20*9880d681SAndroid Build Coastguard Worker /// Vector ALU) and then the ScalarALU will AND the VCC register with the
21*9880d681SAndroid Build Coastguard Worker /// EXEC to update the predicates.
22*9880d681SAndroid Build Coastguard Worker ///
23*9880d681SAndroid Build Coastguard Worker /// For example:
24*9880d681SAndroid Build Coastguard Worker /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
25*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = SI_IF %VCC
26*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
27*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = SI_ELSE %SGPR0
28*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
29*9880d681SAndroid Build Coastguard Worker /// SI_END_CF %SGPR0
30*9880d681SAndroid Build Coastguard Worker ///
31*9880d681SAndroid Build Coastguard Worker /// becomes:
32*9880d681SAndroid Build Coastguard Worker ///
33*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
34*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
35*9880d681SAndroid Build Coastguard Worker /// S_CBRANCH_EXECZ label0 // This instruction is an optional
36*9880d681SAndroid Build Coastguard Worker /// // optimization which allows us to
37*9880d681SAndroid Build Coastguard Worker /// // branch if all the bits of
38*9880d681SAndroid Build Coastguard Worker /// // EXEC are zero.
39*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
40*9880d681SAndroid Build Coastguard Worker ///
41*9880d681SAndroid Build Coastguard Worker /// label0:
42*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
43*9880d681SAndroid Build Coastguard Worker /// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
44*9880d681SAndroid Build Coastguard Worker /// S_BRANCH_EXECZ label1 // Use our branch optimization
45*9880d681SAndroid Build Coastguard Worker /// // instruction again.
46*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
47*9880d681SAndroid Build Coastguard Worker /// label1:
48*9880d681SAndroid Build Coastguard Worker /// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
49*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
52*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
53*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
54*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
55*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/LivePhysRegs.h"
56*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFrameInfo.h"
57*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
58*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
59*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
60*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
61*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Constants.h"
62*9880d681SAndroid Build Coastguard Worker
63*9880d681SAndroid Build Coastguard Worker using namespace llvm;
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-lower-control-flow"
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Worker namespace {
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Worker class SILowerControlFlow : public MachineFunctionPass {
70*9880d681SAndroid Build Coastguard Worker private:
71*9880d681SAndroid Build Coastguard Worker static const unsigned SkipThreshold = 12;
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Worker const SIRegisterInfo *TRI;
74*9880d681SAndroid Build Coastguard Worker const SIInstrInfo *TII;
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Worker bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
77*9880d681SAndroid Build Coastguard Worker
78*9880d681SAndroid Build Coastguard Worker void Skip(MachineInstr &From, MachineOperand &To);
79*9880d681SAndroid Build Coastguard Worker bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker void If(MachineInstr &MI);
82*9880d681SAndroid Build Coastguard Worker void Else(MachineInstr &MI, bool ExecModified);
83*9880d681SAndroid Build Coastguard Worker void Break(MachineInstr &MI);
84*9880d681SAndroid Build Coastguard Worker void IfBreak(MachineInstr &MI);
85*9880d681SAndroid Build Coastguard Worker void ElseBreak(MachineInstr &MI);
86*9880d681SAndroid Build Coastguard Worker void Loop(MachineInstr &MI);
87*9880d681SAndroid Build Coastguard Worker void EndCf(MachineInstr &MI);
88*9880d681SAndroid Build Coastguard Worker
89*9880d681SAndroid Build Coastguard Worker void Kill(MachineInstr &MI);
90*9880d681SAndroid Build Coastguard Worker void Branch(MachineInstr &MI);
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
93*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I) const;
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker std::pair<MachineBasicBlock *, MachineBasicBlock *>
96*9880d681SAndroid Build Coastguard Worker splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Worker void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
99*9880d681SAndroid Build Coastguard Worker const MachineRegisterInfo &MRI,
100*9880d681SAndroid Build Coastguard Worker const MachineInstr &MI,
101*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &LoopBB,
102*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &RemainderBB,
103*9880d681SAndroid Build Coastguard Worker unsigned SaveReg,
104*9880d681SAndroid Build Coastguard Worker const MachineOperand &IdxReg);
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Worker void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
107*9880d681SAndroid Build Coastguard Worker MachineInstr *MovRel,
108*9880d681SAndroid Build Coastguard Worker const MachineOperand &IdxReg,
109*9880d681SAndroid Build Coastguard Worker int Offset);
110*9880d681SAndroid Build Coastguard Worker
111*9880d681SAndroid Build Coastguard Worker bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
112*9880d681SAndroid Build Coastguard Worker std::pair<unsigned, int> computeIndirectRegAndOffset(unsigned VecReg,
113*9880d681SAndroid Build Coastguard Worker int Offset) const;
114*9880d681SAndroid Build Coastguard Worker bool indirectSrc(MachineInstr &MI);
115*9880d681SAndroid Build Coastguard Worker bool indirectDst(MachineInstr &MI);
116*9880d681SAndroid Build Coastguard Worker
117*9880d681SAndroid Build Coastguard Worker public:
118*9880d681SAndroid Build Coastguard Worker static char ID;
119*9880d681SAndroid Build Coastguard Worker
SILowerControlFlow()120*9880d681SAndroid Build Coastguard Worker SILowerControlFlow() :
121*9880d681SAndroid Build Coastguard Worker MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
124*9880d681SAndroid Build Coastguard Worker
getPassName() const125*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
126*9880d681SAndroid Build Coastguard Worker return "SI Lower control flow pseudo instructions";
127*9880d681SAndroid Build Coastguard Worker }
128*9880d681SAndroid Build Coastguard Worker };
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
131*9880d681SAndroid Build Coastguard Worker
132*9880d681SAndroid Build Coastguard Worker char SILowerControlFlow::ID = 0;
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
135*9880d681SAndroid Build Coastguard Worker "SI lower control flow", false, false)
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Worker char &llvm::SILowerControlFlowPassID = SILowerControlFlow::ID;
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker
createSILowerControlFlowPass()140*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSILowerControlFlowPass() {
141*9880d681SAndroid Build Coastguard Worker return new SILowerControlFlow();
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker
opcodeEmitsNoInsts(unsigned Opc)144*9880d681SAndroid Build Coastguard Worker static bool opcodeEmitsNoInsts(unsigned Opc) {
145*9880d681SAndroid Build Coastguard Worker switch (Opc) {
146*9880d681SAndroid Build Coastguard Worker case TargetOpcode::IMPLICIT_DEF:
147*9880d681SAndroid Build Coastguard Worker case TargetOpcode::KILL:
148*9880d681SAndroid Build Coastguard Worker case TargetOpcode::BUNDLE:
149*9880d681SAndroid Build Coastguard Worker case TargetOpcode::CFI_INSTRUCTION:
150*9880d681SAndroid Build Coastguard Worker case TargetOpcode::EH_LABEL:
151*9880d681SAndroid Build Coastguard Worker case TargetOpcode::GC_LABEL:
152*9880d681SAndroid Build Coastguard Worker case TargetOpcode::DBG_VALUE:
153*9880d681SAndroid Build Coastguard Worker return true;
154*9880d681SAndroid Build Coastguard Worker default:
155*9880d681SAndroid Build Coastguard Worker return false;
156*9880d681SAndroid Build Coastguard Worker }
157*9880d681SAndroid Build Coastguard Worker }
158*9880d681SAndroid Build Coastguard Worker
shouldSkip(MachineBasicBlock * From,MachineBasicBlock * To)159*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
160*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *To) {
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Worker unsigned NumInstr = 0;
163*9880d681SAndroid Build Coastguard Worker MachineFunction *MF = From->getParent();
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Worker for (MachineFunction::iterator MBBI(From), ToI(To), End = MF->end();
166*9880d681SAndroid Build Coastguard Worker MBBI != End && MBBI != ToI; ++MBBI) {
167*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MBBI;
168*9880d681SAndroid Build Coastguard Worker
169*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
170*9880d681SAndroid Build Coastguard Worker NumInstr < SkipThreshold && I != E; ++I) {
171*9880d681SAndroid Build Coastguard Worker if (opcodeEmitsNoInsts(I->getOpcode()))
172*9880d681SAndroid Build Coastguard Worker continue;
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker // When a uniform loop is inside non-uniform control flow, the branch
175*9880d681SAndroid Build Coastguard Worker // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
176*9880d681SAndroid Build Coastguard Worker // when EXEC = 0. We should skip the loop lest it becomes infinite.
177*9880d681SAndroid Build Coastguard Worker if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
178*9880d681SAndroid Build Coastguard Worker I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
179*9880d681SAndroid Build Coastguard Worker return true;
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Worker if (I->isInlineAsm()) {
182*9880d681SAndroid Build Coastguard Worker const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
183*9880d681SAndroid Build Coastguard Worker const char *AsmStr = I->getOperand(0).getSymbolName();
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Worker // inlineasm length estimate is number of bytes assuming the longest
186*9880d681SAndroid Build Coastguard Worker // instruction.
187*9880d681SAndroid Build Coastguard Worker uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
188*9880d681SAndroid Build Coastguard Worker NumInstr += MaxAsmSize / MAI->getMaxInstLength();
189*9880d681SAndroid Build Coastguard Worker } else {
190*9880d681SAndroid Build Coastguard Worker ++NumInstr;
191*9880d681SAndroid Build Coastguard Worker }
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Worker if (NumInstr >= SkipThreshold)
194*9880d681SAndroid Build Coastguard Worker return true;
195*9880d681SAndroid Build Coastguard Worker }
196*9880d681SAndroid Build Coastguard Worker }
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Worker return false;
199*9880d681SAndroid Build Coastguard Worker }
200*9880d681SAndroid Build Coastguard Worker
Skip(MachineInstr & From,MachineOperand & To)201*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Worker if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
204*9880d681SAndroid Build Coastguard Worker return;
205*9880d681SAndroid Build Coastguard Worker
206*9880d681SAndroid Build Coastguard Worker DebugLoc DL = From.getDebugLoc();
207*9880d681SAndroid Build Coastguard Worker BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
208*9880d681SAndroid Build Coastguard Worker .addOperand(To);
209*9880d681SAndroid Build Coastguard Worker }
210*9880d681SAndroid Build Coastguard Worker
skipIfDead(MachineInstr & MI,MachineBasicBlock & NextBB)211*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
212*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
213*9880d681SAndroid Build Coastguard Worker MachineFunction *MF = MBB.getParent();
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Worker if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
216*9880d681SAndroid Build Coastguard Worker !shouldSkip(&MBB, &MBB.getParent()->back()))
217*9880d681SAndroid Build Coastguard Worker return false;
218*9880d681SAndroid Build Coastguard Worker
219*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
220*9880d681SAndroid Build Coastguard Worker SkipBB->addSuccessor(&NextBB);
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Worker const DebugLoc &DL = MI.getDebugLoc();
223*9880d681SAndroid Build Coastguard Worker
224*9880d681SAndroid Build Coastguard Worker // If the exec mask is non-zero, skip the next two instructions
225*9880d681SAndroid Build Coastguard Worker BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
226*9880d681SAndroid Build Coastguard Worker .addMBB(&NextBB);
227*9880d681SAndroid Build Coastguard Worker
228*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator Insert = SkipBB->begin();
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker // Exec mask is zero: Export to NULL target...
231*9880d681SAndroid Build Coastguard Worker BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
232*9880d681SAndroid Build Coastguard Worker .addImm(0)
233*9880d681SAndroid Build Coastguard Worker .addImm(0x09) // V_008DFC_SQ_EXP_NULL
234*9880d681SAndroid Build Coastguard Worker .addImm(0)
235*9880d681SAndroid Build Coastguard Worker .addImm(1)
236*9880d681SAndroid Build Coastguard Worker .addImm(1)
237*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VGPR0, RegState::Undef)
238*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VGPR0, RegState::Undef)
239*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VGPR0, RegState::Undef)
240*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VGPR0, RegState::Undef);
241*9880d681SAndroid Build Coastguard Worker
242*9880d681SAndroid Build Coastguard Worker // ... and terminate wavefront.
243*9880d681SAndroid Build Coastguard Worker BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Worker return true;
246*9880d681SAndroid Build Coastguard Worker }
247*9880d681SAndroid Build Coastguard Worker
If(MachineInstr & MI)248*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::If(MachineInstr &MI) {
249*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
250*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
251*9880d681SAndroid Build Coastguard Worker unsigned Reg = MI.getOperand(0).getReg();
252*9880d681SAndroid Build Coastguard Worker unsigned Vcc = MI.getOperand(1).getReg();
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
255*9880d681SAndroid Build Coastguard Worker .addReg(Vcc);
256*9880d681SAndroid Build Coastguard Worker
257*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
258*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
259*9880d681SAndroid Build Coastguard Worker .addReg(Reg);
260*9880d681SAndroid Build Coastguard Worker
261*9880d681SAndroid Build Coastguard Worker Skip(MI, MI.getOperand(2));
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker // Insert a pseudo terminator to help keep the verifier happy.
264*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
265*9880d681SAndroid Build Coastguard Worker .addOperand(MI.getOperand(2))
266*9880d681SAndroid Build Coastguard Worker .addReg(Reg);
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
269*9880d681SAndroid Build Coastguard Worker }
270*9880d681SAndroid Build Coastguard Worker
Else(MachineInstr & MI,bool ExecModified)271*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) {
272*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
273*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
274*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
275*9880d681SAndroid Build Coastguard Worker unsigned Src = MI.getOperand(1).getReg();
276*9880d681SAndroid Build Coastguard Worker
277*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, MBB.getFirstNonPHI(), DL,
278*9880d681SAndroid Build Coastguard Worker TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
279*9880d681SAndroid Build Coastguard Worker .addReg(Src); // Saved EXEC
280*9880d681SAndroid Build Coastguard Worker
281*9880d681SAndroid Build Coastguard Worker if (ExecModified) {
282*9880d681SAndroid Build Coastguard Worker // Adjust the saved exec to account for the modifications during the flow
283*9880d681SAndroid Build Coastguard Worker // block that contains the ELSE. This can happen when WQM mode is switched
284*9880d681SAndroid Build Coastguard Worker // off.
285*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
286*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
287*9880d681SAndroid Build Coastguard Worker .addReg(Dst);
288*9880d681SAndroid Build Coastguard Worker }
289*9880d681SAndroid Build Coastguard Worker
290*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
291*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
292*9880d681SAndroid Build Coastguard Worker .addReg(Dst);
293*9880d681SAndroid Build Coastguard Worker
294*9880d681SAndroid Build Coastguard Worker Skip(MI, MI.getOperand(2));
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Worker // Insert a pseudo terminator to help keep the verifier happy.
297*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
298*9880d681SAndroid Build Coastguard Worker .addOperand(MI.getOperand(2))
299*9880d681SAndroid Build Coastguard Worker .addReg(Dst);
300*9880d681SAndroid Build Coastguard Worker
301*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
302*9880d681SAndroid Build Coastguard Worker }
303*9880d681SAndroid Build Coastguard Worker
Break(MachineInstr & MI)304*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Break(MachineInstr &MI) {
305*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
306*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
307*9880d681SAndroid Build Coastguard Worker
308*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
309*9880d681SAndroid Build Coastguard Worker unsigned Src = MI.getOperand(1).getReg();
310*9880d681SAndroid Build Coastguard Worker
311*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
312*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
313*9880d681SAndroid Build Coastguard Worker .addReg(Src);
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
316*9880d681SAndroid Build Coastguard Worker }
317*9880d681SAndroid Build Coastguard Worker
IfBreak(MachineInstr & MI)318*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::IfBreak(MachineInstr &MI) {
319*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
320*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
321*9880d681SAndroid Build Coastguard Worker
322*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
323*9880d681SAndroid Build Coastguard Worker unsigned Vcc = MI.getOperand(1).getReg();
324*9880d681SAndroid Build Coastguard Worker unsigned Src = MI.getOperand(2).getReg();
325*9880d681SAndroid Build Coastguard Worker
326*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
327*9880d681SAndroid Build Coastguard Worker .addReg(Vcc)
328*9880d681SAndroid Build Coastguard Worker .addReg(Src);
329*9880d681SAndroid Build Coastguard Worker
330*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
331*9880d681SAndroid Build Coastguard Worker }
332*9880d681SAndroid Build Coastguard Worker
ElseBreak(MachineInstr & MI)333*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::ElseBreak(MachineInstr &MI) {
334*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
335*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
336*9880d681SAndroid Build Coastguard Worker
337*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
338*9880d681SAndroid Build Coastguard Worker unsigned Saved = MI.getOperand(1).getReg();
339*9880d681SAndroid Build Coastguard Worker unsigned Src = MI.getOperand(2).getReg();
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
342*9880d681SAndroid Build Coastguard Worker .addReg(Saved)
343*9880d681SAndroid Build Coastguard Worker .addReg(Src);
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
346*9880d681SAndroid Build Coastguard Worker }
347*9880d681SAndroid Build Coastguard Worker
Loop(MachineInstr & MI)348*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Loop(MachineInstr &MI) {
349*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
350*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
351*9880d681SAndroid Build Coastguard Worker unsigned Src = MI.getOperand(0).getReg();
352*9880d681SAndroid Build Coastguard Worker
353*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
354*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
355*9880d681SAndroid Build Coastguard Worker .addReg(Src);
356*9880d681SAndroid Build Coastguard Worker
357*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
358*9880d681SAndroid Build Coastguard Worker .addOperand(MI.getOperand(1));
359*9880d681SAndroid Build Coastguard Worker
360*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
361*9880d681SAndroid Build Coastguard Worker }
362*9880d681SAndroid Build Coastguard Worker
EndCf(MachineInstr & MI)363*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::EndCf(MachineInstr &MI) {
364*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
365*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
366*9880d681SAndroid Build Coastguard Worker unsigned Reg = MI.getOperand(0).getReg();
367*9880d681SAndroid Build Coastguard Worker
368*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, MBB.getFirstNonPHI(), DL,
369*9880d681SAndroid Build Coastguard Worker TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
370*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
371*9880d681SAndroid Build Coastguard Worker .addReg(Reg);
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
374*9880d681SAndroid Build Coastguard Worker }
375*9880d681SAndroid Build Coastguard Worker
Branch(MachineInstr & MI)376*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Branch(MachineInstr &MI) {
377*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
378*9880d681SAndroid Build Coastguard Worker if (MBB == MI.getParent()->getNextNode())
379*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Worker // If these aren't equal, this is probably an infinite loop.
382*9880d681SAndroid Build Coastguard Worker }
383*9880d681SAndroid Build Coastguard Worker
Kill(MachineInstr & MI)384*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Kill(MachineInstr &MI) {
385*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
386*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
387*9880d681SAndroid Build Coastguard Worker const MachineOperand &Op = MI.getOperand(0);
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
390*9880d681SAndroid Build Coastguard Worker CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
391*9880d681SAndroid Build Coastguard Worker // Kill is only allowed in pixel / geometry shaders.
392*9880d681SAndroid Build Coastguard Worker assert(CallConv == CallingConv::AMDGPU_PS ||
393*9880d681SAndroid Build Coastguard Worker CallConv == CallingConv::AMDGPU_GS);
394*9880d681SAndroid Build Coastguard Worker #endif
395*9880d681SAndroid Build Coastguard Worker
396*9880d681SAndroid Build Coastguard Worker // Clear this thread from the exec mask if the operand is negative
397*9880d681SAndroid Build Coastguard Worker if ((Op.isImm())) {
398*9880d681SAndroid Build Coastguard Worker // Constant operand: Set exec mask to 0 or do nothing
399*9880d681SAndroid Build Coastguard Worker if (Op.getImm() & 0x80000000) {
400*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
401*9880d681SAndroid Build Coastguard Worker .addImm(0);
402*9880d681SAndroid Build Coastguard Worker }
403*9880d681SAndroid Build Coastguard Worker } else {
404*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
405*9880d681SAndroid Build Coastguard Worker .addImm(0)
406*9880d681SAndroid Build Coastguard Worker .addOperand(Op);
407*9880d681SAndroid Build Coastguard Worker }
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
410*9880d681SAndroid Build Coastguard Worker }
411*9880d681SAndroid Build Coastguard Worker
412*9880d681SAndroid Build Coastguard Worker // All currently live registers must remain so in the remainder block.
splitLoadM0BlockLiveIns(LivePhysRegs & RemainderLiveRegs,const MachineRegisterInfo & MRI,const MachineInstr & MI,MachineBasicBlock & LoopBB,MachineBasicBlock & RemainderBB,unsigned SaveReg,const MachineOperand & IdxReg)413*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
414*9880d681SAndroid Build Coastguard Worker const MachineRegisterInfo &MRI,
415*9880d681SAndroid Build Coastguard Worker const MachineInstr &MI,
416*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &LoopBB,
417*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &RemainderBB,
418*9880d681SAndroid Build Coastguard Worker unsigned SaveReg,
419*9880d681SAndroid Build Coastguard Worker const MachineOperand &IdxReg) {
420*9880d681SAndroid Build Coastguard Worker // Add reg defined in loop body.
421*9880d681SAndroid Build Coastguard Worker RemainderLiveRegs.addReg(SaveReg);
422*9880d681SAndroid Build Coastguard Worker
423*9880d681SAndroid Build Coastguard Worker if (const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)) {
424*9880d681SAndroid Build Coastguard Worker if (!Val->isUndef()) {
425*9880d681SAndroid Build Coastguard Worker RemainderLiveRegs.addReg(Val->getReg());
426*9880d681SAndroid Build Coastguard Worker LoopBB.addLiveIn(Val->getReg());
427*9880d681SAndroid Build Coastguard Worker }
428*9880d681SAndroid Build Coastguard Worker }
429*9880d681SAndroid Build Coastguard Worker
430*9880d681SAndroid Build Coastguard Worker for (unsigned Reg : RemainderLiveRegs) {
431*9880d681SAndroid Build Coastguard Worker if (MRI.isAllocatable(Reg))
432*9880d681SAndroid Build Coastguard Worker RemainderBB.addLiveIn(Reg);
433*9880d681SAndroid Build Coastguard Worker }
434*9880d681SAndroid Build Coastguard Worker
435*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
436*9880d681SAndroid Build Coastguard Worker if (!Src->isUndef())
437*9880d681SAndroid Build Coastguard Worker LoopBB.addLiveIn(Src->getReg());
438*9880d681SAndroid Build Coastguard Worker
439*9880d681SAndroid Build Coastguard Worker if (!IdxReg.isUndef())
440*9880d681SAndroid Build Coastguard Worker LoopBB.addLiveIn(IdxReg.getReg());
441*9880d681SAndroid Build Coastguard Worker LoopBB.sortUniqueLiveIns();
442*9880d681SAndroid Build Coastguard Worker }
443*9880d681SAndroid Build Coastguard Worker
emitLoadM0FromVGPRLoop(MachineBasicBlock & LoopBB,DebugLoc DL,MachineInstr * MovRel,const MachineOperand & IdxReg,int Offset)444*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
445*9880d681SAndroid Build Coastguard Worker DebugLoc DL,
446*9880d681SAndroid Build Coastguard Worker MachineInstr *MovRel,
447*9880d681SAndroid Build Coastguard Worker const MachineOperand &IdxReg,
448*9880d681SAndroid Build Coastguard Worker int Offset) {
449*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I = LoopBB.begin();
450*9880d681SAndroid Build Coastguard Worker
451*9880d681SAndroid Build Coastguard Worker // Read the next variant into VCC (lower 32 bits) <- also loop target
452*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO)
453*9880d681SAndroid Build Coastguard Worker .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
454*9880d681SAndroid Build Coastguard Worker
455*9880d681SAndroid Build Coastguard Worker // Move index from VCC into M0
456*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
457*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VCC_LO);
458*9880d681SAndroid Build Coastguard Worker
459*9880d681SAndroid Build Coastguard Worker // Compare the just read M0 value to all possible Idx values
460*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
461*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::M0)
462*9880d681SAndroid Build Coastguard Worker .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
463*9880d681SAndroid Build Coastguard Worker
464*9880d681SAndroid Build Coastguard Worker // Update EXEC, save the original EXEC value to VCC
465*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
466*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VCC);
467*9880d681SAndroid Build Coastguard Worker
468*9880d681SAndroid Build Coastguard Worker if (Offset != 0) {
469*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
470*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::M0)
471*9880d681SAndroid Build Coastguard Worker .addImm(Offset);
472*9880d681SAndroid Build Coastguard Worker }
473*9880d681SAndroid Build Coastguard Worker
474*9880d681SAndroid Build Coastguard Worker // Do the actual move
475*9880d681SAndroid Build Coastguard Worker LoopBB.insert(I, MovRel);
476*9880d681SAndroid Build Coastguard Worker
477*9880d681SAndroid Build Coastguard Worker // Update EXEC, switch all done bits to 0 and all todo bits to 1
478*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
479*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC)
480*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::VCC);
481*9880d681SAndroid Build Coastguard Worker
482*9880d681SAndroid Build Coastguard Worker // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
483*9880d681SAndroid Build Coastguard Worker BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
484*9880d681SAndroid Build Coastguard Worker .addMBB(&LoopBB);
485*9880d681SAndroid Build Coastguard Worker }
486*9880d681SAndroid Build Coastguard Worker
insertSkipBlock(MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const487*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *SILowerControlFlow::insertSkipBlock(
488*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
489*9880d681SAndroid Build Coastguard Worker MachineFunction *MF = MBB.getParent();
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
492*9880d681SAndroid Build Coastguard Worker MachineFunction::iterator MBBI(MBB);
493*9880d681SAndroid Build Coastguard Worker ++MBBI;
494*9880d681SAndroid Build Coastguard Worker
495*9880d681SAndroid Build Coastguard Worker MF->insert(MBBI, SkipBB);
496*9880d681SAndroid Build Coastguard Worker MBB.addSuccessor(SkipBB);
497*9880d681SAndroid Build Coastguard Worker
498*9880d681SAndroid Build Coastguard Worker return SkipBB;
499*9880d681SAndroid Build Coastguard Worker }
500*9880d681SAndroid Build Coastguard Worker
501*9880d681SAndroid Build Coastguard Worker std::pair<MachineBasicBlock *, MachineBasicBlock *>
splitBlock(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)502*9880d681SAndroid Build Coastguard Worker SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
503*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I) {
504*9880d681SAndroid Build Coastguard Worker MachineFunction *MF = MBB.getParent();
505*9880d681SAndroid Build Coastguard Worker
506*9880d681SAndroid Build Coastguard Worker // To insert the loop we need to split the block. Move everything after this
507*9880d681SAndroid Build Coastguard Worker // point to a new block, and insert a new empty block between the two.
508*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
509*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
510*9880d681SAndroid Build Coastguard Worker MachineFunction::iterator MBBI(MBB);
511*9880d681SAndroid Build Coastguard Worker ++MBBI;
512*9880d681SAndroid Build Coastguard Worker
513*9880d681SAndroid Build Coastguard Worker MF->insert(MBBI, LoopBB);
514*9880d681SAndroid Build Coastguard Worker MF->insert(MBBI, RemainderBB);
515*9880d681SAndroid Build Coastguard Worker
516*9880d681SAndroid Build Coastguard Worker // Move the rest of the block into a new block.
517*9880d681SAndroid Build Coastguard Worker RemainderBB->transferSuccessors(&MBB);
518*9880d681SAndroid Build Coastguard Worker RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
519*9880d681SAndroid Build Coastguard Worker
520*9880d681SAndroid Build Coastguard Worker MBB.addSuccessor(LoopBB);
521*9880d681SAndroid Build Coastguard Worker
522*9880d681SAndroid Build Coastguard Worker return std::make_pair(LoopBB, RemainderBB);
523*9880d681SAndroid Build Coastguard Worker }
524*9880d681SAndroid Build Coastguard Worker
525*9880d681SAndroid Build Coastguard Worker // Returns true if a new block was inserted.
loadM0(MachineInstr & MI,MachineInstr * MovRel,int Offset)526*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
527*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
528*9880d681SAndroid Build Coastguard Worker DebugLoc DL = MI.getDebugLoc();
529*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I(&MI);
530*9880d681SAndroid Build Coastguard Worker
531*9880d681SAndroid Build Coastguard Worker const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
532*9880d681SAndroid Build Coastguard Worker
533*9880d681SAndroid Build Coastguard Worker if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) {
534*9880d681SAndroid Build Coastguard Worker if (Offset != 0) {
535*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
536*9880d681SAndroid Build Coastguard Worker .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()))
537*9880d681SAndroid Build Coastguard Worker .addImm(Offset);
538*9880d681SAndroid Build Coastguard Worker } else {
539*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
540*9880d681SAndroid Build Coastguard Worker .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()));
541*9880d681SAndroid Build Coastguard Worker }
542*9880d681SAndroid Build Coastguard Worker
543*9880d681SAndroid Build Coastguard Worker MBB.insert(I, MovRel);
544*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
545*9880d681SAndroid Build Coastguard Worker return false;
546*9880d681SAndroid Build Coastguard Worker }
547*9880d681SAndroid Build Coastguard Worker
548*9880d681SAndroid Build Coastguard Worker MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
549*9880d681SAndroid Build Coastguard Worker SaveOp->setIsDead(false);
550*9880d681SAndroid Build Coastguard Worker unsigned Save = SaveOp->getReg();
551*9880d681SAndroid Build Coastguard Worker
552*9880d681SAndroid Build Coastguard Worker // Reading from a VGPR requires looping over all workitems in the wavefront.
553*9880d681SAndroid Build Coastguard Worker assert(AMDGPU::SReg_64RegClass.contains(Save) &&
554*9880d681SAndroid Build Coastguard Worker AMDGPU::VGPR_32RegClass.contains(Idx->getReg()));
555*9880d681SAndroid Build Coastguard Worker
556*9880d681SAndroid Build Coastguard Worker // Save the EXEC mask
557*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
558*9880d681SAndroid Build Coastguard Worker .addReg(AMDGPU::EXEC);
559*9880d681SAndroid Build Coastguard Worker
560*9880d681SAndroid Build Coastguard Worker LivePhysRegs RemainderLiveRegs(TRI);
561*9880d681SAndroid Build Coastguard Worker
562*9880d681SAndroid Build Coastguard Worker RemainderLiveRegs.addLiveOuts(MBB);
563*9880d681SAndroid Build Coastguard Worker
564*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *LoopBB;
565*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *RemainderBB;
566*9880d681SAndroid Build Coastguard Worker
567*9880d681SAndroid Build Coastguard Worker std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
568*9880d681SAndroid Build Coastguard Worker
569*9880d681SAndroid Build Coastguard Worker for (const MachineInstr &Inst : reverse(*RemainderBB))
570*9880d681SAndroid Build Coastguard Worker RemainderLiveRegs.stepBackward(Inst);
571*9880d681SAndroid Build Coastguard Worker
572*9880d681SAndroid Build Coastguard Worker MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
573*9880d681SAndroid Build Coastguard Worker LoopBB->addSuccessor(RemainderBB);
574*9880d681SAndroid Build Coastguard Worker LoopBB->addSuccessor(LoopBB);
575*9880d681SAndroid Build Coastguard Worker
576*9880d681SAndroid Build Coastguard Worker splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
577*9880d681SAndroid Build Coastguard Worker *RemainderBB, Save, *Idx);
578*9880d681SAndroid Build Coastguard Worker
579*9880d681SAndroid Build Coastguard Worker emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
580*9880d681SAndroid Build Coastguard Worker
581*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator First = RemainderBB->begin();
582*9880d681SAndroid Build Coastguard Worker BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
583*9880d681SAndroid Build Coastguard Worker .addReg(Save);
584*9880d681SAndroid Build Coastguard Worker
585*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
586*9880d681SAndroid Build Coastguard Worker return true;
587*9880d681SAndroid Build Coastguard Worker }
588*9880d681SAndroid Build Coastguard Worker
589*9880d681SAndroid Build Coastguard Worker /// \param @VecReg The register which holds element zero of the vector being
590*9880d681SAndroid Build Coastguard Worker /// addressed into.
591*9880d681SAndroid Build Coastguard Worker //
592*9880d681SAndroid Build Coastguard Worker /// \param[in] @Idx The index operand from the movrel instruction. This must be
593*9880d681SAndroid Build Coastguard Worker // a register, but may be NoRegister.
594*9880d681SAndroid Build Coastguard Worker ///
595*9880d681SAndroid Build Coastguard Worker /// \param[in] @Offset As an input, this is the constant offset part of the
596*9880d681SAndroid Build Coastguard Worker // indirect Index. e.g. v0 = v[VecReg + Offset] As an output, this is a constant
597*9880d681SAndroid Build Coastguard Worker // value that needs to be added to the value stored in M0.
598*9880d681SAndroid Build Coastguard Worker std::pair<unsigned, int>
computeIndirectRegAndOffset(unsigned VecReg,int Offset) const599*9880d681SAndroid Build Coastguard Worker SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, int Offset) const {
600*9880d681SAndroid Build Coastguard Worker unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
601*9880d681SAndroid Build Coastguard Worker if (!SubReg)
602*9880d681SAndroid Build Coastguard Worker SubReg = VecReg;
603*9880d681SAndroid Build Coastguard Worker
604*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *SuperRC = TRI->getPhysRegClass(VecReg);
605*9880d681SAndroid Build Coastguard Worker const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
606*9880d681SAndroid Build Coastguard Worker int NumElts = SuperRC->getSize() / RC->getSize();
607*9880d681SAndroid Build Coastguard Worker
608*9880d681SAndroid Build Coastguard Worker int BaseRegIdx = TRI->getHWRegIndex(SubReg);
609*9880d681SAndroid Build Coastguard Worker
610*9880d681SAndroid Build Coastguard Worker // Skip out of bounds offsets, or else we would end up using an undefined
611*9880d681SAndroid Build Coastguard Worker // register.
612*9880d681SAndroid Build Coastguard Worker if (Offset >= NumElts)
613*9880d681SAndroid Build Coastguard Worker return std::make_pair(RC->getRegister(BaseRegIdx), Offset);
614*9880d681SAndroid Build Coastguard Worker
615*9880d681SAndroid Build Coastguard Worker int RegIdx = BaseRegIdx + Offset;
616*9880d681SAndroid Build Coastguard Worker if (RegIdx < 0) {
617*9880d681SAndroid Build Coastguard Worker Offset = RegIdx;
618*9880d681SAndroid Build Coastguard Worker RegIdx = 0;
619*9880d681SAndroid Build Coastguard Worker } else {
620*9880d681SAndroid Build Coastguard Worker Offset = 0;
621*9880d681SAndroid Build Coastguard Worker }
622*9880d681SAndroid Build Coastguard Worker
623*9880d681SAndroid Build Coastguard Worker unsigned Reg = RC->getRegister(RegIdx);
624*9880d681SAndroid Build Coastguard Worker return std::make_pair(Reg, Offset);
625*9880d681SAndroid Build Coastguard Worker }
626*9880d681SAndroid Build Coastguard Worker
627*9880d681SAndroid Build Coastguard Worker // Return true if a new block was inserted.
indirectSrc(MachineInstr & MI)628*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::indirectSrc(MachineInstr &MI) {
629*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
630*9880d681SAndroid Build Coastguard Worker const DebugLoc &DL = MI.getDebugLoc();
631*9880d681SAndroid Build Coastguard Worker
632*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
633*9880d681SAndroid Build Coastguard Worker const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
634*9880d681SAndroid Build Coastguard Worker int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
635*9880d681SAndroid Build Coastguard Worker unsigned Reg;
636*9880d681SAndroid Build Coastguard Worker
637*9880d681SAndroid Build Coastguard Worker std::tie(Reg, Offset) = computeIndirectRegAndOffset(SrcVec->getReg(), Offset);
638*9880d681SAndroid Build Coastguard Worker
639*9880d681SAndroid Build Coastguard Worker const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
640*9880d681SAndroid Build Coastguard Worker if (Idx->getReg() == AMDGPU::NoRegister) {
641*9880d681SAndroid Build Coastguard Worker // Only had a constant offset, copy the register directly.
642*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
643*9880d681SAndroid Build Coastguard Worker .addReg(Reg, getUndefRegState(SrcVec->isUndef()));
644*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
645*9880d681SAndroid Build Coastguard Worker return false;
646*9880d681SAndroid Build Coastguard Worker }
647*9880d681SAndroid Build Coastguard Worker
648*9880d681SAndroid Build Coastguard Worker MachineInstr *MovRel =
649*9880d681SAndroid Build Coastguard Worker BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
650*9880d681SAndroid Build Coastguard Worker .addReg(Reg, getUndefRegState(SrcVec->isUndef()))
651*9880d681SAndroid Build Coastguard Worker .addReg(SrcVec->getReg(), RegState::Implicit);
652*9880d681SAndroid Build Coastguard Worker
653*9880d681SAndroid Build Coastguard Worker return loadM0(MI, MovRel, Offset);
654*9880d681SAndroid Build Coastguard Worker }
655*9880d681SAndroid Build Coastguard Worker
656*9880d681SAndroid Build Coastguard Worker // Return true if a new block was inserted.
indirectDst(MachineInstr & MI)657*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::indirectDst(MachineInstr &MI) {
658*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *MI.getParent();
659*9880d681SAndroid Build Coastguard Worker const DebugLoc &DL = MI.getDebugLoc();
660*9880d681SAndroid Build Coastguard Worker
661*9880d681SAndroid Build Coastguard Worker unsigned Dst = MI.getOperand(0).getReg();
662*9880d681SAndroid Build Coastguard Worker int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
663*9880d681SAndroid Build Coastguard Worker unsigned Reg;
664*9880d681SAndroid Build Coastguard Worker
665*9880d681SAndroid Build Coastguard Worker const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
666*9880d681SAndroid Build Coastguard Worker std::tie(Reg, Offset) = computeIndirectRegAndOffset(Dst, Offset);
667*9880d681SAndroid Build Coastguard Worker
668*9880d681SAndroid Build Coastguard Worker MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
669*9880d681SAndroid Build Coastguard Worker if (Idx->getReg() == AMDGPU::NoRegister) {
670*9880d681SAndroid Build Coastguard Worker // Only had a constant offset, copy the register directly.
671*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
672*9880d681SAndroid Build Coastguard Worker .addOperand(*Val);
673*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
674*9880d681SAndroid Build Coastguard Worker return false;
675*9880d681SAndroid Build Coastguard Worker }
676*9880d681SAndroid Build Coastguard Worker
677*9880d681SAndroid Build Coastguard Worker MachineInstr *MovRel =
678*9880d681SAndroid Build Coastguard Worker BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32), Reg)
679*9880d681SAndroid Build Coastguard Worker .addReg(Val->getReg(), getUndefRegState(Val->isUndef()))
680*9880d681SAndroid Build Coastguard Worker .addReg(Dst, RegState::Implicit);
681*9880d681SAndroid Build Coastguard Worker
682*9880d681SAndroid Build Coastguard Worker return loadM0(MI, MovRel, Offset);
683*9880d681SAndroid Build Coastguard Worker }
684*9880d681SAndroid Build Coastguard Worker
runOnMachineFunction(MachineFunction & MF)685*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
686*9880d681SAndroid Build Coastguard Worker const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
687*9880d681SAndroid Build Coastguard Worker TII = ST.getInstrInfo();
688*9880d681SAndroid Build Coastguard Worker TRI = &TII->getRegisterInfo();
689*9880d681SAndroid Build Coastguard Worker
690*9880d681SAndroid Build Coastguard Worker SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
691*9880d681SAndroid Build Coastguard Worker
692*9880d681SAndroid Build Coastguard Worker bool HaveKill = false;
693*9880d681SAndroid Build Coastguard Worker bool NeedFlat = false;
694*9880d681SAndroid Build Coastguard Worker unsigned Depth = 0;
695*9880d681SAndroid Build Coastguard Worker
696*9880d681SAndroid Build Coastguard Worker MachineFunction::iterator NextBB;
697*9880d681SAndroid Build Coastguard Worker
698*9880d681SAndroid Build Coastguard Worker for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
699*9880d681SAndroid Build Coastguard Worker BI != BE; BI = NextBB) {
700*9880d681SAndroid Build Coastguard Worker NextBB = std::next(BI);
701*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *BI;
702*9880d681SAndroid Build Coastguard Worker
703*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *EmptyMBBAtEnd = nullptr;
704*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I, Next;
705*9880d681SAndroid Build Coastguard Worker bool ExecModified = false;
706*9880d681SAndroid Build Coastguard Worker
707*9880d681SAndroid Build Coastguard Worker for (I = MBB.begin(); I != MBB.end(); I = Next) {
708*9880d681SAndroid Build Coastguard Worker Next = std::next(I);
709*9880d681SAndroid Build Coastguard Worker
710*9880d681SAndroid Build Coastguard Worker MachineInstr &MI = *I;
711*9880d681SAndroid Build Coastguard Worker
712*9880d681SAndroid Build Coastguard Worker // Flat uses m0 in case it needs to access LDS.
713*9880d681SAndroid Build Coastguard Worker if (TII->isFLAT(MI))
714*9880d681SAndroid Build Coastguard Worker NeedFlat = true;
715*9880d681SAndroid Build Coastguard Worker
716*9880d681SAndroid Build Coastguard Worker if (I->modifiesRegister(AMDGPU::EXEC, TRI))
717*9880d681SAndroid Build Coastguard Worker ExecModified = true;
718*9880d681SAndroid Build Coastguard Worker
719*9880d681SAndroid Build Coastguard Worker switch (MI.getOpcode()) {
720*9880d681SAndroid Build Coastguard Worker default: break;
721*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_IF:
722*9880d681SAndroid Build Coastguard Worker ++Depth;
723*9880d681SAndroid Build Coastguard Worker If(MI);
724*9880d681SAndroid Build Coastguard Worker break;
725*9880d681SAndroid Build Coastguard Worker
726*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_ELSE:
727*9880d681SAndroid Build Coastguard Worker Else(MI, ExecModified);
728*9880d681SAndroid Build Coastguard Worker break;
729*9880d681SAndroid Build Coastguard Worker
730*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_BREAK:
731*9880d681SAndroid Build Coastguard Worker Break(MI);
732*9880d681SAndroid Build Coastguard Worker break;
733*9880d681SAndroid Build Coastguard Worker
734*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_IF_BREAK:
735*9880d681SAndroid Build Coastguard Worker IfBreak(MI);
736*9880d681SAndroid Build Coastguard Worker break;
737*9880d681SAndroid Build Coastguard Worker
738*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_ELSE_BREAK:
739*9880d681SAndroid Build Coastguard Worker ElseBreak(MI);
740*9880d681SAndroid Build Coastguard Worker break;
741*9880d681SAndroid Build Coastguard Worker
742*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_LOOP:
743*9880d681SAndroid Build Coastguard Worker ++Depth;
744*9880d681SAndroid Build Coastguard Worker Loop(MI);
745*9880d681SAndroid Build Coastguard Worker break;
746*9880d681SAndroid Build Coastguard Worker
747*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_END_CF:
748*9880d681SAndroid Build Coastguard Worker if (--Depth == 0 && HaveKill) {
749*9880d681SAndroid Build Coastguard Worker HaveKill = false;
750*9880d681SAndroid Build Coastguard Worker
751*9880d681SAndroid Build Coastguard Worker if (skipIfDead(MI, *NextBB)) {
752*9880d681SAndroid Build Coastguard Worker NextBB = std::next(BI);
753*9880d681SAndroid Build Coastguard Worker BE = MF.end();
754*9880d681SAndroid Build Coastguard Worker Next = MBB.end();
755*9880d681SAndroid Build Coastguard Worker }
756*9880d681SAndroid Build Coastguard Worker }
757*9880d681SAndroid Build Coastguard Worker EndCf(MI);
758*9880d681SAndroid Build Coastguard Worker break;
759*9880d681SAndroid Build Coastguard Worker
760*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_KILL_TERMINATOR:
761*9880d681SAndroid Build Coastguard Worker if (Depth == 0) {
762*9880d681SAndroid Build Coastguard Worker if (skipIfDead(MI, *NextBB)) {
763*9880d681SAndroid Build Coastguard Worker NextBB = std::next(BI);
764*9880d681SAndroid Build Coastguard Worker BE = MF.end();
765*9880d681SAndroid Build Coastguard Worker Next = MBB.end();
766*9880d681SAndroid Build Coastguard Worker }
767*9880d681SAndroid Build Coastguard Worker } else
768*9880d681SAndroid Build Coastguard Worker HaveKill = true;
769*9880d681SAndroid Build Coastguard Worker Kill(MI);
770*9880d681SAndroid Build Coastguard Worker break;
771*9880d681SAndroid Build Coastguard Worker
772*9880d681SAndroid Build Coastguard Worker case AMDGPU::S_BRANCH:
773*9880d681SAndroid Build Coastguard Worker Branch(MI);
774*9880d681SAndroid Build Coastguard Worker break;
775*9880d681SAndroid Build Coastguard Worker
776*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_SRC_V1:
777*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_SRC_V2:
778*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_SRC_V4:
779*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_SRC_V8:
780*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_SRC_V16:
781*9880d681SAndroid Build Coastguard Worker if (indirectSrc(MI)) {
782*9880d681SAndroid Build Coastguard Worker // The block was split at this point. We can safely skip the middle
783*9880d681SAndroid Build Coastguard Worker // inserted block to the following which contains the rest of this
784*9880d681SAndroid Build Coastguard Worker // block's instructions.
785*9880d681SAndroid Build Coastguard Worker NextBB = std::next(BI);
786*9880d681SAndroid Build Coastguard Worker BE = MF.end();
787*9880d681SAndroid Build Coastguard Worker Next = MBB.end();
788*9880d681SAndroid Build Coastguard Worker }
789*9880d681SAndroid Build Coastguard Worker
790*9880d681SAndroid Build Coastguard Worker break;
791*9880d681SAndroid Build Coastguard Worker
792*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_DST_V1:
793*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_DST_V2:
794*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_DST_V4:
795*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_DST_V8:
796*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_INDIRECT_DST_V16:
797*9880d681SAndroid Build Coastguard Worker if (indirectDst(MI)) {
798*9880d681SAndroid Build Coastguard Worker // The block was split at this point. We can safely skip the middle
799*9880d681SAndroid Build Coastguard Worker // inserted block to the following which contains the rest of this
800*9880d681SAndroid Build Coastguard Worker // block's instructions.
801*9880d681SAndroid Build Coastguard Worker NextBB = std::next(BI);
802*9880d681SAndroid Build Coastguard Worker BE = MF.end();
803*9880d681SAndroid Build Coastguard Worker Next = MBB.end();
804*9880d681SAndroid Build Coastguard Worker }
805*9880d681SAndroid Build Coastguard Worker
806*9880d681SAndroid Build Coastguard Worker break;
807*9880d681SAndroid Build Coastguard Worker
808*9880d681SAndroid Build Coastguard Worker case AMDGPU::SI_RETURN: {
809*9880d681SAndroid Build Coastguard Worker assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
810*9880d681SAndroid Build Coastguard Worker
811*9880d681SAndroid Build Coastguard Worker // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
812*9880d681SAndroid Build Coastguard Worker // because external bytecode will be appended at the end.
813*9880d681SAndroid Build Coastguard Worker if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
814*9880d681SAndroid Build Coastguard Worker // SI_RETURN is not the last instruction. Add an empty block at
815*9880d681SAndroid Build Coastguard Worker // the end and jump there.
816*9880d681SAndroid Build Coastguard Worker if (!EmptyMBBAtEnd) {
817*9880d681SAndroid Build Coastguard Worker EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
818*9880d681SAndroid Build Coastguard Worker MF.insert(MF.end(), EmptyMBBAtEnd);
819*9880d681SAndroid Build Coastguard Worker }
820*9880d681SAndroid Build Coastguard Worker
821*9880d681SAndroid Build Coastguard Worker MBB.addSuccessor(EmptyMBBAtEnd);
822*9880d681SAndroid Build Coastguard Worker BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
823*9880d681SAndroid Build Coastguard Worker .addMBB(EmptyMBBAtEnd);
824*9880d681SAndroid Build Coastguard Worker I->eraseFromParent();
825*9880d681SAndroid Build Coastguard Worker }
826*9880d681SAndroid Build Coastguard Worker break;
827*9880d681SAndroid Build Coastguard Worker }
828*9880d681SAndroid Build Coastguard Worker }
829*9880d681SAndroid Build Coastguard Worker }
830*9880d681SAndroid Build Coastguard Worker }
831*9880d681SAndroid Build Coastguard Worker
832*9880d681SAndroid Build Coastguard Worker if (NeedFlat && MFI->IsKernel) {
833*9880d681SAndroid Build Coastguard Worker // TODO: What to use with function calls?
834*9880d681SAndroid Build Coastguard Worker // We will need to Initialize the flat scratch register pair.
835*9880d681SAndroid Build Coastguard Worker if (NeedFlat)
836*9880d681SAndroid Build Coastguard Worker MFI->setHasFlatInstructions(true);
837*9880d681SAndroid Build Coastguard Worker }
838*9880d681SAndroid Build Coastguard Worker
839*9880d681SAndroid Build Coastguard Worker return true;
840*9880d681SAndroid Build Coastguard Worker }
841