xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief This pass lowers the pseudo control flow instructions to real
12*9880d681SAndroid Build Coastguard Worker /// machine instructions.
13*9880d681SAndroid Build Coastguard Worker ///
14*9880d681SAndroid Build Coastguard Worker /// All control flow is handled using predicated instructions and
15*9880d681SAndroid Build Coastguard Worker /// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector
16*9880d681SAndroid Build Coastguard Worker /// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs
17*9880d681SAndroid Build Coastguard Worker /// by writting to the 64-bit EXEC register (each bit corresponds to a
18*9880d681SAndroid Build Coastguard Worker /// single vector ALU).  Typically, for predicates, a vector ALU will write
19*9880d681SAndroid Build Coastguard Worker /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
20*9880d681SAndroid Build Coastguard Worker /// Vector ALU) and then the ScalarALU will AND the VCC register with the
21*9880d681SAndroid Build Coastguard Worker /// EXEC to update the predicates.
22*9880d681SAndroid Build Coastguard Worker ///
23*9880d681SAndroid Build Coastguard Worker /// For example:
24*9880d681SAndroid Build Coastguard Worker /// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
25*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = SI_IF %VCC
26*9880d681SAndroid Build Coastguard Worker ///   %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
27*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = SI_ELSE %SGPR0
28*9880d681SAndroid Build Coastguard Worker ///   %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
29*9880d681SAndroid Build Coastguard Worker /// SI_END_CF %SGPR0
30*9880d681SAndroid Build Coastguard Worker ///
31*9880d681SAndroid Build Coastguard Worker /// becomes:
32*9880d681SAndroid Build Coastguard Worker ///
33*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask
34*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask
35*9880d681SAndroid Build Coastguard Worker /// S_CBRANCH_EXECZ label0            // This instruction is an optional
36*9880d681SAndroid Build Coastguard Worker ///                                   // optimization which allows us to
37*9880d681SAndroid Build Coastguard Worker ///                                   // branch if all the bits of
38*9880d681SAndroid Build Coastguard Worker ///                                   // EXEC are zero.
39*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
40*9880d681SAndroid Build Coastguard Worker ///
41*9880d681SAndroid Build Coastguard Worker /// label0:
42*9880d681SAndroid Build Coastguard Worker /// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC   // Restore the exec mask for the Then block
43*9880d681SAndroid Build Coastguard Worker /// %EXEC = S_XOR_B64 %SGPR0, %EXEC    // Clear live bits from saved exec mask
44*9880d681SAndroid Build Coastguard Worker /// S_BRANCH_EXECZ label1              // Use our branch optimization
45*9880d681SAndroid Build Coastguard Worker ///                                    // instruction again.
46*9880d681SAndroid Build Coastguard Worker /// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block
47*9880d681SAndroid Build Coastguard Worker /// label1:
48*9880d681SAndroid Build Coastguard Worker /// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits
49*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
50*9880d681SAndroid Build Coastguard Worker 
51*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
52*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
53*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
54*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
55*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/LivePhysRegs.h"
56*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFrameInfo.h"
57*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
58*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
59*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
60*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
61*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Constants.h"
62*9880d681SAndroid Build Coastguard Worker 
63*9880d681SAndroid Build Coastguard Worker using namespace llvm;
64*9880d681SAndroid Build Coastguard Worker 
65*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-lower-control-flow"
66*9880d681SAndroid Build Coastguard Worker 
67*9880d681SAndroid Build Coastguard Worker namespace {
68*9880d681SAndroid Build Coastguard Worker 
69*9880d681SAndroid Build Coastguard Worker class SILowerControlFlow : public MachineFunctionPass {
70*9880d681SAndroid Build Coastguard Worker private:
71*9880d681SAndroid Build Coastguard Worker   static const unsigned SkipThreshold = 12;
72*9880d681SAndroid Build Coastguard Worker 
73*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo *TRI;
74*9880d681SAndroid Build Coastguard Worker   const SIInstrInfo *TII;
75*9880d681SAndroid Build Coastguard Worker 
76*9880d681SAndroid Build Coastguard Worker   bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
77*9880d681SAndroid Build Coastguard Worker 
78*9880d681SAndroid Build Coastguard Worker   void Skip(MachineInstr &From, MachineOperand &To);
79*9880d681SAndroid Build Coastguard Worker   bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker   void If(MachineInstr &MI);
82*9880d681SAndroid Build Coastguard Worker   void Else(MachineInstr &MI, bool ExecModified);
83*9880d681SAndroid Build Coastguard Worker   void Break(MachineInstr &MI);
84*9880d681SAndroid Build Coastguard Worker   void IfBreak(MachineInstr &MI);
85*9880d681SAndroid Build Coastguard Worker   void ElseBreak(MachineInstr &MI);
86*9880d681SAndroid Build Coastguard Worker   void Loop(MachineInstr &MI);
87*9880d681SAndroid Build Coastguard Worker   void EndCf(MachineInstr &MI);
88*9880d681SAndroid Build Coastguard Worker 
89*9880d681SAndroid Build Coastguard Worker   void Kill(MachineInstr &MI);
90*9880d681SAndroid Build Coastguard Worker   void Branch(MachineInstr &MI);
91*9880d681SAndroid Build Coastguard Worker 
92*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
93*9880d681SAndroid Build Coastguard Worker                                      MachineBasicBlock::iterator I) const;
94*9880d681SAndroid Build Coastguard Worker 
95*9880d681SAndroid Build Coastguard Worker   std::pair<MachineBasicBlock *, MachineBasicBlock *>
96*9880d681SAndroid Build Coastguard Worker   splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
97*9880d681SAndroid Build Coastguard Worker 
98*9880d681SAndroid Build Coastguard Worker   void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
99*9880d681SAndroid Build Coastguard Worker                                const MachineRegisterInfo &MRI,
100*9880d681SAndroid Build Coastguard Worker                                const MachineInstr &MI,
101*9880d681SAndroid Build Coastguard Worker                                MachineBasicBlock &LoopBB,
102*9880d681SAndroid Build Coastguard Worker                                MachineBasicBlock &RemainderBB,
103*9880d681SAndroid Build Coastguard Worker                                unsigned SaveReg,
104*9880d681SAndroid Build Coastguard Worker                                const MachineOperand &IdxReg);
105*9880d681SAndroid Build Coastguard Worker 
106*9880d681SAndroid Build Coastguard Worker   void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
107*9880d681SAndroid Build Coastguard Worker                               MachineInstr *MovRel,
108*9880d681SAndroid Build Coastguard Worker                               const MachineOperand &IdxReg,
109*9880d681SAndroid Build Coastguard Worker                               int Offset);
110*9880d681SAndroid Build Coastguard Worker 
111*9880d681SAndroid Build Coastguard Worker   bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
112*9880d681SAndroid Build Coastguard Worker   std::pair<unsigned, int> computeIndirectRegAndOffset(unsigned VecReg,
113*9880d681SAndroid Build Coastguard Worker                                                        int Offset) const;
114*9880d681SAndroid Build Coastguard Worker   bool indirectSrc(MachineInstr &MI);
115*9880d681SAndroid Build Coastguard Worker   bool indirectDst(MachineInstr &MI);
116*9880d681SAndroid Build Coastguard Worker 
117*9880d681SAndroid Build Coastguard Worker public:
118*9880d681SAndroid Build Coastguard Worker   static char ID;
119*9880d681SAndroid Build Coastguard Worker 
SILowerControlFlow()120*9880d681SAndroid Build Coastguard Worker   SILowerControlFlow() :
121*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
122*9880d681SAndroid Build Coastguard Worker 
123*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override;
124*9880d681SAndroid Build Coastguard Worker 
getPassName() const125*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
126*9880d681SAndroid Build Coastguard Worker     return "SI Lower control flow pseudo instructions";
127*9880d681SAndroid Build Coastguard Worker   }
128*9880d681SAndroid Build Coastguard Worker };
129*9880d681SAndroid Build Coastguard Worker 
130*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
131*9880d681SAndroid Build Coastguard Worker 
132*9880d681SAndroid Build Coastguard Worker char SILowerControlFlow::ID = 0;
133*9880d681SAndroid Build Coastguard Worker 
134*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
135*9880d681SAndroid Build Coastguard Worker                 "SI lower control flow", false, false)
136*9880d681SAndroid Build Coastguard Worker 
137*9880d681SAndroid Build Coastguard Worker char &llvm::SILowerControlFlowPassID = SILowerControlFlow::ID;
138*9880d681SAndroid Build Coastguard Worker 
139*9880d681SAndroid Build Coastguard Worker 
createSILowerControlFlowPass()140*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSILowerControlFlowPass() {
141*9880d681SAndroid Build Coastguard Worker   return new SILowerControlFlow();
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker 
opcodeEmitsNoInsts(unsigned Opc)144*9880d681SAndroid Build Coastguard Worker static bool opcodeEmitsNoInsts(unsigned Opc) {
145*9880d681SAndroid Build Coastguard Worker   switch (Opc) {
146*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::IMPLICIT_DEF:
147*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::KILL:
148*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::BUNDLE:
149*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::CFI_INSTRUCTION:
150*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::EH_LABEL:
151*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::GC_LABEL:
152*9880d681SAndroid Build Coastguard Worker   case TargetOpcode::DBG_VALUE:
153*9880d681SAndroid Build Coastguard Worker     return true;
154*9880d681SAndroid Build Coastguard Worker   default:
155*9880d681SAndroid Build Coastguard Worker     return false;
156*9880d681SAndroid Build Coastguard Worker   }
157*9880d681SAndroid Build Coastguard Worker }
158*9880d681SAndroid Build Coastguard Worker 
shouldSkip(MachineBasicBlock * From,MachineBasicBlock * To)159*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
160*9880d681SAndroid Build Coastguard Worker                                     MachineBasicBlock *To) {
161*9880d681SAndroid Build Coastguard Worker 
162*9880d681SAndroid Build Coastguard Worker   unsigned NumInstr = 0;
163*9880d681SAndroid Build Coastguard Worker   MachineFunction *MF = From->getParent();
164*9880d681SAndroid Build Coastguard Worker 
165*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator MBBI(From), ToI(To), End = MF->end();
166*9880d681SAndroid Build Coastguard Worker        MBBI != End && MBBI != ToI; ++MBBI) {
167*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *MBBI;
168*9880d681SAndroid Build Coastguard Worker 
169*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
170*9880d681SAndroid Build Coastguard Worker          NumInstr < SkipThreshold && I != E; ++I) {
171*9880d681SAndroid Build Coastguard Worker       if (opcodeEmitsNoInsts(I->getOpcode()))
172*9880d681SAndroid Build Coastguard Worker         continue;
173*9880d681SAndroid Build Coastguard Worker 
174*9880d681SAndroid Build Coastguard Worker       // When a uniform loop is inside non-uniform control flow, the branch
175*9880d681SAndroid Build Coastguard Worker       // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
176*9880d681SAndroid Build Coastguard Worker       // when EXEC = 0. We should skip the loop lest it becomes infinite.
177*9880d681SAndroid Build Coastguard Worker       if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
178*9880d681SAndroid Build Coastguard Worker           I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
179*9880d681SAndroid Build Coastguard Worker         return true;
180*9880d681SAndroid Build Coastguard Worker 
181*9880d681SAndroid Build Coastguard Worker       if (I->isInlineAsm()) {
182*9880d681SAndroid Build Coastguard Worker         const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
183*9880d681SAndroid Build Coastguard Worker         const char *AsmStr = I->getOperand(0).getSymbolName();
184*9880d681SAndroid Build Coastguard Worker 
185*9880d681SAndroid Build Coastguard Worker         // inlineasm length estimate is number of bytes assuming the longest
186*9880d681SAndroid Build Coastguard Worker         // instruction.
187*9880d681SAndroid Build Coastguard Worker         uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
188*9880d681SAndroid Build Coastguard Worker         NumInstr += MaxAsmSize / MAI->getMaxInstLength();
189*9880d681SAndroid Build Coastguard Worker       } else {
190*9880d681SAndroid Build Coastguard Worker         ++NumInstr;
191*9880d681SAndroid Build Coastguard Worker       }
192*9880d681SAndroid Build Coastguard Worker 
193*9880d681SAndroid Build Coastguard Worker       if (NumInstr >= SkipThreshold)
194*9880d681SAndroid Build Coastguard Worker         return true;
195*9880d681SAndroid Build Coastguard Worker     }
196*9880d681SAndroid Build Coastguard Worker   }
197*9880d681SAndroid Build Coastguard Worker 
198*9880d681SAndroid Build Coastguard Worker   return false;
199*9880d681SAndroid Build Coastguard Worker }
200*9880d681SAndroid Build Coastguard Worker 
Skip(MachineInstr & From,MachineOperand & To)201*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
202*9880d681SAndroid Build Coastguard Worker 
203*9880d681SAndroid Build Coastguard Worker   if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
204*9880d681SAndroid Build Coastguard Worker     return;
205*9880d681SAndroid Build Coastguard Worker 
206*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = From.getDebugLoc();
207*9880d681SAndroid Build Coastguard Worker   BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
208*9880d681SAndroid Build Coastguard Worker     .addOperand(To);
209*9880d681SAndroid Build Coastguard Worker }
210*9880d681SAndroid Build Coastguard Worker 
skipIfDead(MachineInstr & MI,MachineBasicBlock & NextBB)211*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
212*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
213*9880d681SAndroid Build Coastguard Worker   MachineFunction *MF = MBB.getParent();
214*9880d681SAndroid Build Coastguard Worker 
215*9880d681SAndroid Build Coastguard Worker   if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
216*9880d681SAndroid Build Coastguard Worker       !shouldSkip(&MBB, &MBB.getParent()->back()))
217*9880d681SAndroid Build Coastguard Worker     return false;
218*9880d681SAndroid Build Coastguard Worker 
219*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
220*9880d681SAndroid Build Coastguard Worker   SkipBB->addSuccessor(&NextBB);
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker   const DebugLoc &DL = MI.getDebugLoc();
223*9880d681SAndroid Build Coastguard Worker 
224*9880d681SAndroid Build Coastguard Worker   // If the exec mask is non-zero, skip the next two instructions
225*9880d681SAndroid Build Coastguard Worker   BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
226*9880d681SAndroid Build Coastguard Worker     .addMBB(&NextBB);
227*9880d681SAndroid Build Coastguard Worker 
228*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock::iterator Insert = SkipBB->begin();
229*9880d681SAndroid Build Coastguard Worker 
230*9880d681SAndroid Build Coastguard Worker   // Exec mask is zero: Export to NULL target...
231*9880d681SAndroid Build Coastguard Worker   BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
232*9880d681SAndroid Build Coastguard Worker     .addImm(0)
233*9880d681SAndroid Build Coastguard Worker     .addImm(0x09) // V_008DFC_SQ_EXP_NULL
234*9880d681SAndroid Build Coastguard Worker     .addImm(0)
235*9880d681SAndroid Build Coastguard Worker     .addImm(1)
236*9880d681SAndroid Build Coastguard Worker     .addImm(1)
237*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VGPR0, RegState::Undef)
238*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VGPR0, RegState::Undef)
239*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VGPR0, RegState::Undef)
240*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VGPR0, RegState::Undef);
241*9880d681SAndroid Build Coastguard Worker 
242*9880d681SAndroid Build Coastguard Worker   // ... and terminate wavefront.
243*9880d681SAndroid Build Coastguard Worker   BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
244*9880d681SAndroid Build Coastguard Worker 
245*9880d681SAndroid Build Coastguard Worker   return true;
246*9880d681SAndroid Build Coastguard Worker }
247*9880d681SAndroid Build Coastguard Worker 
If(MachineInstr & MI)248*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::If(MachineInstr &MI) {
249*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
250*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
251*9880d681SAndroid Build Coastguard Worker   unsigned Reg = MI.getOperand(0).getReg();
252*9880d681SAndroid Build Coastguard Worker   unsigned Vcc = MI.getOperand(1).getReg();
253*9880d681SAndroid Build Coastguard Worker 
254*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
255*9880d681SAndroid Build Coastguard Worker           .addReg(Vcc);
256*9880d681SAndroid Build Coastguard Worker 
257*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
258*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC)
259*9880d681SAndroid Build Coastguard Worker           .addReg(Reg);
260*9880d681SAndroid Build Coastguard Worker 
261*9880d681SAndroid Build Coastguard Worker   Skip(MI, MI.getOperand(2));
262*9880d681SAndroid Build Coastguard Worker 
263*9880d681SAndroid Build Coastguard Worker   // Insert a pseudo terminator to help keep the verifier happy.
264*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
265*9880d681SAndroid Build Coastguard Worker     .addOperand(MI.getOperand(2))
266*9880d681SAndroid Build Coastguard Worker     .addReg(Reg);
267*9880d681SAndroid Build Coastguard Worker 
268*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
269*9880d681SAndroid Build Coastguard Worker }
270*9880d681SAndroid Build Coastguard Worker 
Else(MachineInstr & MI,bool ExecModified)271*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) {
272*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
273*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
274*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
275*9880d681SAndroid Build Coastguard Worker   unsigned Src = MI.getOperand(1).getReg();
276*9880d681SAndroid Build Coastguard Worker 
277*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, MBB.getFirstNonPHI(), DL,
278*9880d681SAndroid Build Coastguard Worker           TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
279*9880d681SAndroid Build Coastguard Worker           .addReg(Src); // Saved EXEC
280*9880d681SAndroid Build Coastguard Worker 
281*9880d681SAndroid Build Coastguard Worker   if (ExecModified) {
282*9880d681SAndroid Build Coastguard Worker     // Adjust the saved exec to account for the modifications during the flow
283*9880d681SAndroid Build Coastguard Worker     // block that contains the ELSE. This can happen when WQM mode is switched
284*9880d681SAndroid Build Coastguard Worker     // off.
285*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
286*9880d681SAndroid Build Coastguard Worker             .addReg(AMDGPU::EXEC)
287*9880d681SAndroid Build Coastguard Worker             .addReg(Dst);
288*9880d681SAndroid Build Coastguard Worker   }
289*9880d681SAndroid Build Coastguard Worker 
290*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
291*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC)
292*9880d681SAndroid Build Coastguard Worker           .addReg(Dst);
293*9880d681SAndroid Build Coastguard Worker 
294*9880d681SAndroid Build Coastguard Worker   Skip(MI, MI.getOperand(2));
295*9880d681SAndroid Build Coastguard Worker 
296*9880d681SAndroid Build Coastguard Worker   // Insert a pseudo terminator to help keep the verifier happy.
297*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
298*9880d681SAndroid Build Coastguard Worker     .addOperand(MI.getOperand(2))
299*9880d681SAndroid Build Coastguard Worker     .addReg(Dst);
300*9880d681SAndroid Build Coastguard Worker 
301*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
302*9880d681SAndroid Build Coastguard Worker }
303*9880d681SAndroid Build Coastguard Worker 
Break(MachineInstr & MI)304*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Break(MachineInstr &MI) {
305*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
306*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
307*9880d681SAndroid Build Coastguard Worker 
308*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
309*9880d681SAndroid Build Coastguard Worker   unsigned Src = MI.getOperand(1).getReg();
310*9880d681SAndroid Build Coastguard Worker 
311*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
312*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC)
313*9880d681SAndroid Build Coastguard Worker           .addReg(Src);
314*9880d681SAndroid Build Coastguard Worker 
315*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
316*9880d681SAndroid Build Coastguard Worker }
317*9880d681SAndroid Build Coastguard Worker 
IfBreak(MachineInstr & MI)318*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::IfBreak(MachineInstr &MI) {
319*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
320*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
321*9880d681SAndroid Build Coastguard Worker 
322*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
323*9880d681SAndroid Build Coastguard Worker   unsigned Vcc = MI.getOperand(1).getReg();
324*9880d681SAndroid Build Coastguard Worker   unsigned Src = MI.getOperand(2).getReg();
325*9880d681SAndroid Build Coastguard Worker 
326*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
327*9880d681SAndroid Build Coastguard Worker           .addReg(Vcc)
328*9880d681SAndroid Build Coastguard Worker           .addReg(Src);
329*9880d681SAndroid Build Coastguard Worker 
330*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
331*9880d681SAndroid Build Coastguard Worker }
332*9880d681SAndroid Build Coastguard Worker 
ElseBreak(MachineInstr & MI)333*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::ElseBreak(MachineInstr &MI) {
334*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
335*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
336*9880d681SAndroid Build Coastguard Worker 
337*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
338*9880d681SAndroid Build Coastguard Worker   unsigned Saved = MI.getOperand(1).getReg();
339*9880d681SAndroid Build Coastguard Worker   unsigned Src = MI.getOperand(2).getReg();
340*9880d681SAndroid Build Coastguard Worker 
341*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
342*9880d681SAndroid Build Coastguard Worker           .addReg(Saved)
343*9880d681SAndroid Build Coastguard Worker           .addReg(Src);
344*9880d681SAndroid Build Coastguard Worker 
345*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
346*9880d681SAndroid Build Coastguard Worker }
347*9880d681SAndroid Build Coastguard Worker 
Loop(MachineInstr & MI)348*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Loop(MachineInstr &MI) {
349*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
350*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
351*9880d681SAndroid Build Coastguard Worker   unsigned Src = MI.getOperand(0).getReg();
352*9880d681SAndroid Build Coastguard Worker 
353*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
354*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC)
355*9880d681SAndroid Build Coastguard Worker           .addReg(Src);
356*9880d681SAndroid Build Coastguard Worker 
357*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
358*9880d681SAndroid Build Coastguard Worker     .addOperand(MI.getOperand(1));
359*9880d681SAndroid Build Coastguard Worker 
360*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
361*9880d681SAndroid Build Coastguard Worker }
362*9880d681SAndroid Build Coastguard Worker 
EndCf(MachineInstr & MI)363*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::EndCf(MachineInstr &MI) {
364*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
365*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
366*9880d681SAndroid Build Coastguard Worker   unsigned Reg = MI.getOperand(0).getReg();
367*9880d681SAndroid Build Coastguard Worker 
368*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, MBB.getFirstNonPHI(), DL,
369*9880d681SAndroid Build Coastguard Worker           TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
370*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC)
371*9880d681SAndroid Build Coastguard Worker           .addReg(Reg);
372*9880d681SAndroid Build Coastguard Worker 
373*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
374*9880d681SAndroid Build Coastguard Worker }
375*9880d681SAndroid Build Coastguard Worker 
Branch(MachineInstr & MI)376*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Branch(MachineInstr &MI) {
377*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
378*9880d681SAndroid Build Coastguard Worker   if (MBB == MI.getParent()->getNextNode())
379*9880d681SAndroid Build Coastguard Worker     MI.eraseFromParent();
380*9880d681SAndroid Build Coastguard Worker 
381*9880d681SAndroid Build Coastguard Worker   // If these aren't equal, this is probably an infinite loop.
382*9880d681SAndroid Build Coastguard Worker }
383*9880d681SAndroid Build Coastguard Worker 
Kill(MachineInstr & MI)384*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::Kill(MachineInstr &MI) {
385*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
386*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
387*9880d681SAndroid Build Coastguard Worker   const MachineOperand &Op = MI.getOperand(0);
388*9880d681SAndroid Build Coastguard Worker 
389*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
390*9880d681SAndroid Build Coastguard Worker   CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
391*9880d681SAndroid Build Coastguard Worker   // Kill is only allowed in pixel / geometry shaders.
392*9880d681SAndroid Build Coastguard Worker   assert(CallConv == CallingConv::AMDGPU_PS ||
393*9880d681SAndroid Build Coastguard Worker          CallConv == CallingConv::AMDGPU_GS);
394*9880d681SAndroid Build Coastguard Worker #endif
395*9880d681SAndroid Build Coastguard Worker 
396*9880d681SAndroid Build Coastguard Worker   // Clear this thread from the exec mask if the operand is negative
397*9880d681SAndroid Build Coastguard Worker   if ((Op.isImm())) {
398*9880d681SAndroid Build Coastguard Worker     // Constant operand: Set exec mask to 0 or do nothing
399*9880d681SAndroid Build Coastguard Worker     if (Op.getImm() & 0x80000000) {
400*9880d681SAndroid Build Coastguard Worker       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
401*9880d681SAndroid Build Coastguard Worker               .addImm(0);
402*9880d681SAndroid Build Coastguard Worker     }
403*9880d681SAndroid Build Coastguard Worker   } else {
404*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
405*9880d681SAndroid Build Coastguard Worker            .addImm(0)
406*9880d681SAndroid Build Coastguard Worker            .addOperand(Op);
407*9880d681SAndroid Build Coastguard Worker   }
408*9880d681SAndroid Build Coastguard Worker 
409*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
410*9880d681SAndroid Build Coastguard Worker }
411*9880d681SAndroid Build Coastguard Worker 
412*9880d681SAndroid Build Coastguard Worker // All currently live registers must remain so in the remainder block.
splitLoadM0BlockLiveIns(LivePhysRegs & RemainderLiveRegs,const MachineRegisterInfo & MRI,const MachineInstr & MI,MachineBasicBlock & LoopBB,MachineBasicBlock & RemainderBB,unsigned SaveReg,const MachineOperand & IdxReg)413*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
414*9880d681SAndroid Build Coastguard Worker                                                  const MachineRegisterInfo &MRI,
415*9880d681SAndroid Build Coastguard Worker                                                  const MachineInstr &MI,
416*9880d681SAndroid Build Coastguard Worker                                                  MachineBasicBlock &LoopBB,
417*9880d681SAndroid Build Coastguard Worker                                                  MachineBasicBlock &RemainderBB,
418*9880d681SAndroid Build Coastguard Worker                                                  unsigned SaveReg,
419*9880d681SAndroid Build Coastguard Worker                                                  const MachineOperand &IdxReg) {
420*9880d681SAndroid Build Coastguard Worker   // Add reg defined in loop body.
421*9880d681SAndroid Build Coastguard Worker   RemainderLiveRegs.addReg(SaveReg);
422*9880d681SAndroid Build Coastguard Worker 
423*9880d681SAndroid Build Coastguard Worker   if (const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)) {
424*9880d681SAndroid Build Coastguard Worker     if (!Val->isUndef()) {
425*9880d681SAndroid Build Coastguard Worker       RemainderLiveRegs.addReg(Val->getReg());
426*9880d681SAndroid Build Coastguard Worker       LoopBB.addLiveIn(Val->getReg());
427*9880d681SAndroid Build Coastguard Worker     }
428*9880d681SAndroid Build Coastguard Worker   }
429*9880d681SAndroid Build Coastguard Worker 
430*9880d681SAndroid Build Coastguard Worker   for (unsigned Reg : RemainderLiveRegs) {
431*9880d681SAndroid Build Coastguard Worker     if (MRI.isAllocatable(Reg))
432*9880d681SAndroid Build Coastguard Worker       RemainderBB.addLiveIn(Reg);
433*9880d681SAndroid Build Coastguard Worker   }
434*9880d681SAndroid Build Coastguard Worker 
435*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
436*9880d681SAndroid Build Coastguard Worker   if (!Src->isUndef())
437*9880d681SAndroid Build Coastguard Worker     LoopBB.addLiveIn(Src->getReg());
438*9880d681SAndroid Build Coastguard Worker 
439*9880d681SAndroid Build Coastguard Worker   if (!IdxReg.isUndef())
440*9880d681SAndroid Build Coastguard Worker     LoopBB.addLiveIn(IdxReg.getReg());
441*9880d681SAndroid Build Coastguard Worker   LoopBB.sortUniqueLiveIns();
442*9880d681SAndroid Build Coastguard Worker }
443*9880d681SAndroid Build Coastguard Worker 
emitLoadM0FromVGPRLoop(MachineBasicBlock & LoopBB,DebugLoc DL,MachineInstr * MovRel,const MachineOperand & IdxReg,int Offset)444*9880d681SAndroid Build Coastguard Worker void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
445*9880d681SAndroid Build Coastguard Worker                                                 DebugLoc DL,
446*9880d681SAndroid Build Coastguard Worker                                                 MachineInstr *MovRel,
447*9880d681SAndroid Build Coastguard Worker                                                 const MachineOperand &IdxReg,
448*9880d681SAndroid Build Coastguard Worker                                                 int Offset) {
449*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock::iterator I = LoopBB.begin();
450*9880d681SAndroid Build Coastguard Worker 
451*9880d681SAndroid Build Coastguard Worker   // Read the next variant into VCC (lower 32 bits) <- also loop target
452*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO)
453*9880d681SAndroid Build Coastguard Worker     .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
454*9880d681SAndroid Build Coastguard Worker 
455*9880d681SAndroid Build Coastguard Worker   // Move index from VCC into M0
456*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
457*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VCC_LO);
458*9880d681SAndroid Build Coastguard Worker 
459*9880d681SAndroid Build Coastguard Worker   // Compare the just read M0 value to all possible Idx values
460*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
461*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::M0)
462*9880d681SAndroid Build Coastguard Worker     .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
463*9880d681SAndroid Build Coastguard Worker 
464*9880d681SAndroid Build Coastguard Worker   // Update EXEC, save the original EXEC value to VCC
465*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
466*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VCC);
467*9880d681SAndroid Build Coastguard Worker 
468*9880d681SAndroid Build Coastguard Worker   if (Offset != 0) {
469*9880d681SAndroid Build Coastguard Worker     BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
470*9880d681SAndroid Build Coastguard Worker       .addReg(AMDGPU::M0)
471*9880d681SAndroid Build Coastguard Worker       .addImm(Offset);
472*9880d681SAndroid Build Coastguard Worker   }
473*9880d681SAndroid Build Coastguard Worker 
474*9880d681SAndroid Build Coastguard Worker   // Do the actual move
475*9880d681SAndroid Build Coastguard Worker   LoopBB.insert(I, MovRel);
476*9880d681SAndroid Build Coastguard Worker 
477*9880d681SAndroid Build Coastguard Worker   // Update EXEC, switch all done bits to 0 and all todo bits to 1
478*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
479*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::EXEC)
480*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::VCC);
481*9880d681SAndroid Build Coastguard Worker 
482*9880d681SAndroid Build Coastguard Worker   // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
483*9880d681SAndroid Build Coastguard Worker   BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
484*9880d681SAndroid Build Coastguard Worker     .addMBB(&LoopBB);
485*9880d681SAndroid Build Coastguard Worker }
486*9880d681SAndroid Build Coastguard Worker 
insertSkipBlock(MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const487*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *SILowerControlFlow::insertSkipBlock(
488*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
489*9880d681SAndroid Build Coastguard Worker   MachineFunction *MF = MBB.getParent();
490*9880d681SAndroid Build Coastguard Worker 
491*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
492*9880d681SAndroid Build Coastguard Worker   MachineFunction::iterator MBBI(MBB);
493*9880d681SAndroid Build Coastguard Worker   ++MBBI;
494*9880d681SAndroid Build Coastguard Worker 
495*9880d681SAndroid Build Coastguard Worker   MF->insert(MBBI, SkipBB);
496*9880d681SAndroid Build Coastguard Worker   MBB.addSuccessor(SkipBB);
497*9880d681SAndroid Build Coastguard Worker 
498*9880d681SAndroid Build Coastguard Worker   return SkipBB;
499*9880d681SAndroid Build Coastguard Worker }
500*9880d681SAndroid Build Coastguard Worker 
501*9880d681SAndroid Build Coastguard Worker std::pair<MachineBasicBlock *, MachineBasicBlock *>
splitBlock(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)502*9880d681SAndroid Build Coastguard Worker SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
503*9880d681SAndroid Build Coastguard Worker                                MachineBasicBlock::iterator I) {
504*9880d681SAndroid Build Coastguard Worker   MachineFunction *MF = MBB.getParent();
505*9880d681SAndroid Build Coastguard Worker 
506*9880d681SAndroid Build Coastguard Worker   // To insert the loop we need to split the block. Move everything after this
507*9880d681SAndroid Build Coastguard Worker   // point to a new block, and insert a new empty block between the two.
508*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
509*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
510*9880d681SAndroid Build Coastguard Worker   MachineFunction::iterator MBBI(MBB);
511*9880d681SAndroid Build Coastguard Worker   ++MBBI;
512*9880d681SAndroid Build Coastguard Worker 
513*9880d681SAndroid Build Coastguard Worker   MF->insert(MBBI, LoopBB);
514*9880d681SAndroid Build Coastguard Worker   MF->insert(MBBI, RemainderBB);
515*9880d681SAndroid Build Coastguard Worker 
516*9880d681SAndroid Build Coastguard Worker   // Move the rest of the block into a new block.
517*9880d681SAndroid Build Coastguard Worker   RemainderBB->transferSuccessors(&MBB);
518*9880d681SAndroid Build Coastguard Worker   RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
519*9880d681SAndroid Build Coastguard Worker 
520*9880d681SAndroid Build Coastguard Worker   MBB.addSuccessor(LoopBB);
521*9880d681SAndroid Build Coastguard Worker 
522*9880d681SAndroid Build Coastguard Worker   return std::make_pair(LoopBB, RemainderBB);
523*9880d681SAndroid Build Coastguard Worker }
524*9880d681SAndroid Build Coastguard Worker 
525*9880d681SAndroid Build Coastguard Worker // Returns true if a new block was inserted.
loadM0(MachineInstr & MI,MachineInstr * MovRel,int Offset)526*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
527*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
528*9880d681SAndroid Build Coastguard Worker   DebugLoc DL = MI.getDebugLoc();
529*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock::iterator I(&MI);
530*9880d681SAndroid Build Coastguard Worker 
531*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
532*9880d681SAndroid Build Coastguard Worker 
533*9880d681SAndroid Build Coastguard Worker   if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) {
534*9880d681SAndroid Build Coastguard Worker     if (Offset != 0) {
535*9880d681SAndroid Build Coastguard Worker       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
536*9880d681SAndroid Build Coastguard Worker         .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()))
537*9880d681SAndroid Build Coastguard Worker         .addImm(Offset);
538*9880d681SAndroid Build Coastguard Worker     } else {
539*9880d681SAndroid Build Coastguard Worker       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
540*9880d681SAndroid Build Coastguard Worker         .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()));
541*9880d681SAndroid Build Coastguard Worker     }
542*9880d681SAndroid Build Coastguard Worker 
543*9880d681SAndroid Build Coastguard Worker     MBB.insert(I, MovRel);
544*9880d681SAndroid Build Coastguard Worker     MI.eraseFromParent();
545*9880d681SAndroid Build Coastguard Worker     return false;
546*9880d681SAndroid Build Coastguard Worker   }
547*9880d681SAndroid Build Coastguard Worker 
548*9880d681SAndroid Build Coastguard Worker   MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
549*9880d681SAndroid Build Coastguard Worker   SaveOp->setIsDead(false);
550*9880d681SAndroid Build Coastguard Worker   unsigned Save = SaveOp->getReg();
551*9880d681SAndroid Build Coastguard Worker 
552*9880d681SAndroid Build Coastguard Worker   // Reading from a VGPR requires looping over all workitems in the wavefront.
553*9880d681SAndroid Build Coastguard Worker   assert(AMDGPU::SReg_64RegClass.contains(Save) &&
554*9880d681SAndroid Build Coastguard Worker          AMDGPU::VGPR_32RegClass.contains(Idx->getReg()));
555*9880d681SAndroid Build Coastguard Worker 
556*9880d681SAndroid Build Coastguard Worker   // Save the EXEC mask
557*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
558*9880d681SAndroid Build Coastguard Worker     .addReg(AMDGPU::EXEC);
559*9880d681SAndroid Build Coastguard Worker 
560*9880d681SAndroid Build Coastguard Worker   LivePhysRegs RemainderLiveRegs(TRI);
561*9880d681SAndroid Build Coastguard Worker 
562*9880d681SAndroid Build Coastguard Worker   RemainderLiveRegs.addLiveOuts(MBB);
563*9880d681SAndroid Build Coastguard Worker 
564*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *LoopBB;
565*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *RemainderBB;
566*9880d681SAndroid Build Coastguard Worker 
567*9880d681SAndroid Build Coastguard Worker   std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
568*9880d681SAndroid Build Coastguard Worker 
569*9880d681SAndroid Build Coastguard Worker   for (const MachineInstr &Inst : reverse(*RemainderBB))
570*9880d681SAndroid Build Coastguard Worker     RemainderLiveRegs.stepBackward(Inst);
571*9880d681SAndroid Build Coastguard Worker 
572*9880d681SAndroid Build Coastguard Worker   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
573*9880d681SAndroid Build Coastguard Worker   LoopBB->addSuccessor(RemainderBB);
574*9880d681SAndroid Build Coastguard Worker   LoopBB->addSuccessor(LoopBB);
575*9880d681SAndroid Build Coastguard Worker 
576*9880d681SAndroid Build Coastguard Worker   splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
577*9880d681SAndroid Build Coastguard Worker                           *RemainderBB, Save, *Idx);
578*9880d681SAndroid Build Coastguard Worker 
579*9880d681SAndroid Build Coastguard Worker   emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
580*9880d681SAndroid Build Coastguard Worker 
581*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock::iterator First = RemainderBB->begin();
582*9880d681SAndroid Build Coastguard Worker   BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
583*9880d681SAndroid Build Coastguard Worker     .addReg(Save);
584*9880d681SAndroid Build Coastguard Worker 
585*9880d681SAndroid Build Coastguard Worker   MI.eraseFromParent();
586*9880d681SAndroid Build Coastguard Worker   return true;
587*9880d681SAndroid Build Coastguard Worker }
588*9880d681SAndroid Build Coastguard Worker 
589*9880d681SAndroid Build Coastguard Worker /// \param @VecReg The register which holds element zero of the vector being
590*9880d681SAndroid Build Coastguard Worker ///                 addressed into.
591*9880d681SAndroid Build Coastguard Worker //
592*9880d681SAndroid Build Coastguard Worker /// \param[in] @Idx The index operand from the movrel instruction. This must be
593*9880d681SAndroid Build Coastguard Worker // a register, but may be NoRegister.
594*9880d681SAndroid Build Coastguard Worker ///
595*9880d681SAndroid Build Coastguard Worker /// \param[in] @Offset As an input, this is the constant offset part of the
596*9880d681SAndroid Build Coastguard Worker // indirect Index. e.g. v0 = v[VecReg + Offset] As an output, this is a constant
597*9880d681SAndroid Build Coastguard Worker // value that needs to be added to the value stored in M0.
598*9880d681SAndroid Build Coastguard Worker std::pair<unsigned, int>
computeIndirectRegAndOffset(unsigned VecReg,int Offset) const599*9880d681SAndroid Build Coastguard Worker SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, int Offset) const {
600*9880d681SAndroid Build Coastguard Worker   unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
601*9880d681SAndroid Build Coastguard Worker   if (!SubReg)
602*9880d681SAndroid Build Coastguard Worker     SubReg = VecReg;
603*9880d681SAndroid Build Coastguard Worker 
604*9880d681SAndroid Build Coastguard Worker   const TargetRegisterClass *SuperRC = TRI->getPhysRegClass(VecReg);
605*9880d681SAndroid Build Coastguard Worker   const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
606*9880d681SAndroid Build Coastguard Worker   int NumElts = SuperRC->getSize() / RC->getSize();
607*9880d681SAndroid Build Coastguard Worker 
608*9880d681SAndroid Build Coastguard Worker   int BaseRegIdx = TRI->getHWRegIndex(SubReg);
609*9880d681SAndroid Build Coastguard Worker 
610*9880d681SAndroid Build Coastguard Worker   // Skip out of bounds offsets, or else we would end up using an undefined
611*9880d681SAndroid Build Coastguard Worker   // register.
612*9880d681SAndroid Build Coastguard Worker   if (Offset >= NumElts)
613*9880d681SAndroid Build Coastguard Worker     return std::make_pair(RC->getRegister(BaseRegIdx), Offset);
614*9880d681SAndroid Build Coastguard Worker 
615*9880d681SAndroid Build Coastguard Worker   int RegIdx = BaseRegIdx + Offset;
616*9880d681SAndroid Build Coastguard Worker   if (RegIdx < 0) {
617*9880d681SAndroid Build Coastguard Worker     Offset = RegIdx;
618*9880d681SAndroid Build Coastguard Worker     RegIdx = 0;
619*9880d681SAndroid Build Coastguard Worker   } else {
620*9880d681SAndroid Build Coastguard Worker     Offset = 0;
621*9880d681SAndroid Build Coastguard Worker   }
622*9880d681SAndroid Build Coastguard Worker 
623*9880d681SAndroid Build Coastguard Worker   unsigned Reg = RC->getRegister(RegIdx);
624*9880d681SAndroid Build Coastguard Worker   return std::make_pair(Reg, Offset);
625*9880d681SAndroid Build Coastguard Worker }
626*9880d681SAndroid Build Coastguard Worker 
627*9880d681SAndroid Build Coastguard Worker // Return true if a new block was inserted.
indirectSrc(MachineInstr & MI)628*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::indirectSrc(MachineInstr &MI) {
629*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
630*9880d681SAndroid Build Coastguard Worker   const DebugLoc &DL = MI.getDebugLoc();
631*9880d681SAndroid Build Coastguard Worker 
632*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
633*9880d681SAndroid Build Coastguard Worker   const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
634*9880d681SAndroid Build Coastguard Worker   int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
635*9880d681SAndroid Build Coastguard Worker   unsigned Reg;
636*9880d681SAndroid Build Coastguard Worker 
637*9880d681SAndroid Build Coastguard Worker   std::tie(Reg, Offset) = computeIndirectRegAndOffset(SrcVec->getReg(), Offset);
638*9880d681SAndroid Build Coastguard Worker 
639*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
640*9880d681SAndroid Build Coastguard Worker   if (Idx->getReg() == AMDGPU::NoRegister) {
641*9880d681SAndroid Build Coastguard Worker     // Only had a constant offset, copy the register directly.
642*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
643*9880d681SAndroid Build Coastguard Worker       .addReg(Reg, getUndefRegState(SrcVec->isUndef()));
644*9880d681SAndroid Build Coastguard Worker     MI.eraseFromParent();
645*9880d681SAndroid Build Coastguard Worker     return false;
646*9880d681SAndroid Build Coastguard Worker   }
647*9880d681SAndroid Build Coastguard Worker 
648*9880d681SAndroid Build Coastguard Worker   MachineInstr *MovRel =
649*9880d681SAndroid Build Coastguard Worker     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
650*9880d681SAndroid Build Coastguard Worker     .addReg(Reg, getUndefRegState(SrcVec->isUndef()))
651*9880d681SAndroid Build Coastguard Worker     .addReg(SrcVec->getReg(), RegState::Implicit);
652*9880d681SAndroid Build Coastguard Worker 
653*9880d681SAndroid Build Coastguard Worker   return loadM0(MI, MovRel, Offset);
654*9880d681SAndroid Build Coastguard Worker }
655*9880d681SAndroid Build Coastguard Worker 
656*9880d681SAndroid Build Coastguard Worker // Return true if a new block was inserted.
indirectDst(MachineInstr & MI)657*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::indirectDst(MachineInstr &MI) {
658*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock &MBB = *MI.getParent();
659*9880d681SAndroid Build Coastguard Worker   const DebugLoc &DL = MI.getDebugLoc();
660*9880d681SAndroid Build Coastguard Worker 
661*9880d681SAndroid Build Coastguard Worker   unsigned Dst = MI.getOperand(0).getReg();
662*9880d681SAndroid Build Coastguard Worker   int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
663*9880d681SAndroid Build Coastguard Worker   unsigned Reg;
664*9880d681SAndroid Build Coastguard Worker 
665*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
666*9880d681SAndroid Build Coastguard Worker   std::tie(Reg, Offset) = computeIndirectRegAndOffset(Dst, Offset);
667*9880d681SAndroid Build Coastguard Worker 
668*9880d681SAndroid Build Coastguard Worker   MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
669*9880d681SAndroid Build Coastguard Worker   if (Idx->getReg() == AMDGPU::NoRegister) {
670*9880d681SAndroid Build Coastguard Worker     // Only had a constant offset, copy the register directly.
671*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
672*9880d681SAndroid Build Coastguard Worker       .addOperand(*Val);
673*9880d681SAndroid Build Coastguard Worker     MI.eraseFromParent();
674*9880d681SAndroid Build Coastguard Worker     return false;
675*9880d681SAndroid Build Coastguard Worker   }
676*9880d681SAndroid Build Coastguard Worker 
677*9880d681SAndroid Build Coastguard Worker   MachineInstr *MovRel =
678*9880d681SAndroid Build Coastguard Worker     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32), Reg)
679*9880d681SAndroid Build Coastguard Worker     .addReg(Val->getReg(), getUndefRegState(Val->isUndef()))
680*9880d681SAndroid Build Coastguard Worker     .addReg(Dst, RegState::Implicit);
681*9880d681SAndroid Build Coastguard Worker 
682*9880d681SAndroid Build Coastguard Worker   return loadM0(MI, MovRel, Offset);
683*9880d681SAndroid Build Coastguard Worker }
684*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)685*9880d681SAndroid Build Coastguard Worker bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
686*9880d681SAndroid Build Coastguard Worker   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
687*9880d681SAndroid Build Coastguard Worker   TII = ST.getInstrInfo();
688*9880d681SAndroid Build Coastguard Worker   TRI = &TII->getRegisterInfo();
689*9880d681SAndroid Build Coastguard Worker 
690*9880d681SAndroid Build Coastguard Worker   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
691*9880d681SAndroid Build Coastguard Worker 
692*9880d681SAndroid Build Coastguard Worker   bool HaveKill = false;
693*9880d681SAndroid Build Coastguard Worker   bool NeedFlat = false;
694*9880d681SAndroid Build Coastguard Worker   unsigned Depth = 0;
695*9880d681SAndroid Build Coastguard Worker 
696*9880d681SAndroid Build Coastguard Worker   MachineFunction::iterator NextBB;
697*9880d681SAndroid Build Coastguard Worker 
698*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
699*9880d681SAndroid Build Coastguard Worker        BI != BE; BI = NextBB) {
700*9880d681SAndroid Build Coastguard Worker     NextBB = std::next(BI);
701*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *BI;
702*9880d681SAndroid Build Coastguard Worker 
703*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *EmptyMBBAtEnd = nullptr;
704*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock::iterator I, Next;
705*9880d681SAndroid Build Coastguard Worker     bool ExecModified = false;
706*9880d681SAndroid Build Coastguard Worker 
707*9880d681SAndroid Build Coastguard Worker     for (I = MBB.begin(); I != MBB.end(); I = Next) {
708*9880d681SAndroid Build Coastguard Worker       Next = std::next(I);
709*9880d681SAndroid Build Coastguard Worker 
710*9880d681SAndroid Build Coastguard Worker       MachineInstr &MI = *I;
711*9880d681SAndroid Build Coastguard Worker 
712*9880d681SAndroid Build Coastguard Worker       // Flat uses m0 in case it needs to access LDS.
713*9880d681SAndroid Build Coastguard Worker       if (TII->isFLAT(MI))
714*9880d681SAndroid Build Coastguard Worker         NeedFlat = true;
715*9880d681SAndroid Build Coastguard Worker 
716*9880d681SAndroid Build Coastguard Worker       if (I->modifiesRegister(AMDGPU::EXEC, TRI))
717*9880d681SAndroid Build Coastguard Worker         ExecModified = true;
718*9880d681SAndroid Build Coastguard Worker 
719*9880d681SAndroid Build Coastguard Worker       switch (MI.getOpcode()) {
720*9880d681SAndroid Build Coastguard Worker         default: break;
721*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_IF:
722*9880d681SAndroid Build Coastguard Worker           ++Depth;
723*9880d681SAndroid Build Coastguard Worker           If(MI);
724*9880d681SAndroid Build Coastguard Worker           break;
725*9880d681SAndroid Build Coastguard Worker 
726*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_ELSE:
727*9880d681SAndroid Build Coastguard Worker           Else(MI, ExecModified);
728*9880d681SAndroid Build Coastguard Worker           break;
729*9880d681SAndroid Build Coastguard Worker 
730*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_BREAK:
731*9880d681SAndroid Build Coastguard Worker           Break(MI);
732*9880d681SAndroid Build Coastguard Worker           break;
733*9880d681SAndroid Build Coastguard Worker 
734*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_IF_BREAK:
735*9880d681SAndroid Build Coastguard Worker           IfBreak(MI);
736*9880d681SAndroid Build Coastguard Worker           break;
737*9880d681SAndroid Build Coastguard Worker 
738*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_ELSE_BREAK:
739*9880d681SAndroid Build Coastguard Worker           ElseBreak(MI);
740*9880d681SAndroid Build Coastguard Worker           break;
741*9880d681SAndroid Build Coastguard Worker 
742*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_LOOP:
743*9880d681SAndroid Build Coastguard Worker           ++Depth;
744*9880d681SAndroid Build Coastguard Worker           Loop(MI);
745*9880d681SAndroid Build Coastguard Worker           break;
746*9880d681SAndroid Build Coastguard Worker 
747*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_END_CF:
748*9880d681SAndroid Build Coastguard Worker           if (--Depth == 0 && HaveKill) {
749*9880d681SAndroid Build Coastguard Worker             HaveKill = false;
750*9880d681SAndroid Build Coastguard Worker 
751*9880d681SAndroid Build Coastguard Worker             if (skipIfDead(MI, *NextBB)) {
752*9880d681SAndroid Build Coastguard Worker               NextBB = std::next(BI);
753*9880d681SAndroid Build Coastguard Worker               BE = MF.end();
754*9880d681SAndroid Build Coastguard Worker               Next = MBB.end();
755*9880d681SAndroid Build Coastguard Worker             }
756*9880d681SAndroid Build Coastguard Worker           }
757*9880d681SAndroid Build Coastguard Worker           EndCf(MI);
758*9880d681SAndroid Build Coastguard Worker           break;
759*9880d681SAndroid Build Coastguard Worker 
760*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_KILL_TERMINATOR:
761*9880d681SAndroid Build Coastguard Worker           if (Depth == 0) {
762*9880d681SAndroid Build Coastguard Worker             if (skipIfDead(MI, *NextBB)) {
763*9880d681SAndroid Build Coastguard Worker               NextBB = std::next(BI);
764*9880d681SAndroid Build Coastguard Worker               BE = MF.end();
765*9880d681SAndroid Build Coastguard Worker               Next = MBB.end();
766*9880d681SAndroid Build Coastguard Worker             }
767*9880d681SAndroid Build Coastguard Worker           } else
768*9880d681SAndroid Build Coastguard Worker             HaveKill = true;
769*9880d681SAndroid Build Coastguard Worker           Kill(MI);
770*9880d681SAndroid Build Coastguard Worker           break;
771*9880d681SAndroid Build Coastguard Worker 
772*9880d681SAndroid Build Coastguard Worker         case AMDGPU::S_BRANCH:
773*9880d681SAndroid Build Coastguard Worker           Branch(MI);
774*9880d681SAndroid Build Coastguard Worker           break;
775*9880d681SAndroid Build Coastguard Worker 
776*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_SRC_V1:
777*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_SRC_V2:
778*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_SRC_V4:
779*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_SRC_V8:
780*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_SRC_V16:
781*9880d681SAndroid Build Coastguard Worker           if (indirectSrc(MI)) {
782*9880d681SAndroid Build Coastguard Worker             // The block was split at this point. We can safely skip the middle
783*9880d681SAndroid Build Coastguard Worker             // inserted block to the following which contains the rest of this
784*9880d681SAndroid Build Coastguard Worker             // block's instructions.
785*9880d681SAndroid Build Coastguard Worker             NextBB = std::next(BI);
786*9880d681SAndroid Build Coastguard Worker             BE = MF.end();
787*9880d681SAndroid Build Coastguard Worker             Next = MBB.end();
788*9880d681SAndroid Build Coastguard Worker           }
789*9880d681SAndroid Build Coastguard Worker 
790*9880d681SAndroid Build Coastguard Worker           break;
791*9880d681SAndroid Build Coastguard Worker 
792*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_DST_V1:
793*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_DST_V2:
794*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_DST_V4:
795*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_DST_V8:
796*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_INDIRECT_DST_V16:
797*9880d681SAndroid Build Coastguard Worker           if (indirectDst(MI)) {
798*9880d681SAndroid Build Coastguard Worker             // The block was split at this point. We can safely skip the middle
799*9880d681SAndroid Build Coastguard Worker             // inserted block to the following which contains the rest of this
800*9880d681SAndroid Build Coastguard Worker             // block's instructions.
801*9880d681SAndroid Build Coastguard Worker             NextBB = std::next(BI);
802*9880d681SAndroid Build Coastguard Worker             BE = MF.end();
803*9880d681SAndroid Build Coastguard Worker             Next = MBB.end();
804*9880d681SAndroid Build Coastguard Worker           }
805*9880d681SAndroid Build Coastguard Worker 
806*9880d681SAndroid Build Coastguard Worker           break;
807*9880d681SAndroid Build Coastguard Worker 
808*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SI_RETURN: {
809*9880d681SAndroid Build Coastguard Worker           assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
810*9880d681SAndroid Build Coastguard Worker 
811*9880d681SAndroid Build Coastguard Worker           // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
812*9880d681SAndroid Build Coastguard Worker           // because external bytecode will be appended at the end.
813*9880d681SAndroid Build Coastguard Worker           if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
814*9880d681SAndroid Build Coastguard Worker             // SI_RETURN is not the last instruction. Add an empty block at
815*9880d681SAndroid Build Coastguard Worker             // the end and jump there.
816*9880d681SAndroid Build Coastguard Worker             if (!EmptyMBBAtEnd) {
817*9880d681SAndroid Build Coastguard Worker               EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
818*9880d681SAndroid Build Coastguard Worker               MF.insert(MF.end(), EmptyMBBAtEnd);
819*9880d681SAndroid Build Coastguard Worker             }
820*9880d681SAndroid Build Coastguard Worker 
821*9880d681SAndroid Build Coastguard Worker             MBB.addSuccessor(EmptyMBBAtEnd);
822*9880d681SAndroid Build Coastguard Worker             BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
823*9880d681SAndroid Build Coastguard Worker                     .addMBB(EmptyMBBAtEnd);
824*9880d681SAndroid Build Coastguard Worker             I->eraseFromParent();
825*9880d681SAndroid Build Coastguard Worker           }
826*9880d681SAndroid Build Coastguard Worker           break;
827*9880d681SAndroid Build Coastguard Worker         }
828*9880d681SAndroid Build Coastguard Worker       }
829*9880d681SAndroid Build Coastguard Worker     }
830*9880d681SAndroid Build Coastguard Worker   }
831*9880d681SAndroid Build Coastguard Worker 
832*9880d681SAndroid Build Coastguard Worker   if (NeedFlat && MFI->IsKernel) {
833*9880d681SAndroid Build Coastguard Worker     // TODO: What to use with function calls?
834*9880d681SAndroid Build Coastguard Worker     // We will need to Initialize the flat scratch register pair.
835*9880d681SAndroid Build Coastguard Worker     if (NeedFlat)
836*9880d681SAndroid Build Coastguard Worker       MFI->setHasFlatInstructions(true);
837*9880d681SAndroid Build Coastguard Worker   }
838*9880d681SAndroid Build Coastguard Worker 
839*9880d681SAndroid Build Coastguard Worker   return true;
840*9880d681SAndroid Build Coastguard Worker }
841