xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief Insert wait instructions for memory reads and writes.
12*9880d681SAndroid Build Coastguard Worker ///
13*9880d681SAndroid Build Coastguard Worker /// Memory reads and writes are issued asynchronously, so we need to insert
14*9880d681SAndroid Build Coastguard Worker /// S_WAITCNT instructions when we want to access any of their results or
15*9880d681SAndroid Build Coastguard Worker /// overwrite any register that's used asynchronously.
16*9880d681SAndroid Build Coastguard Worker //
17*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
18*9880d681SAndroid Build Coastguard Worker 
19*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
20*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
21*9880d681SAndroid Build Coastguard Worker #include "SIDefines.h"
22*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
23*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
28*9880d681SAndroid Build Coastguard Worker 
29*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-insert-waits"
30*9880d681SAndroid Build Coastguard Worker 
31*9880d681SAndroid Build Coastguard Worker using namespace llvm;
32*9880d681SAndroid Build Coastguard Worker 
33*9880d681SAndroid Build Coastguard Worker namespace {
34*9880d681SAndroid Build Coastguard Worker 
35*9880d681SAndroid Build Coastguard Worker /// \brief One variable for each of the hardware counters
36*9880d681SAndroid Build Coastguard Worker typedef union {
37*9880d681SAndroid Build Coastguard Worker   struct {
38*9880d681SAndroid Build Coastguard Worker     unsigned VM;
39*9880d681SAndroid Build Coastguard Worker     unsigned EXP;
40*9880d681SAndroid Build Coastguard Worker     unsigned LGKM;
41*9880d681SAndroid Build Coastguard Worker   } Named;
42*9880d681SAndroid Build Coastguard Worker   unsigned Array[3];
43*9880d681SAndroid Build Coastguard Worker 
44*9880d681SAndroid Build Coastguard Worker } Counters;
45*9880d681SAndroid Build Coastguard Worker 
46*9880d681SAndroid Build Coastguard Worker typedef enum {
47*9880d681SAndroid Build Coastguard Worker   OTHER,
48*9880d681SAndroid Build Coastguard Worker   SMEM,
49*9880d681SAndroid Build Coastguard Worker   VMEM
50*9880d681SAndroid Build Coastguard Worker } InstType;
51*9880d681SAndroid Build Coastguard Worker 
52*9880d681SAndroid Build Coastguard Worker typedef Counters RegCounters[512];
53*9880d681SAndroid Build Coastguard Worker typedef std::pair<unsigned, unsigned> RegInterval;
54*9880d681SAndroid Build Coastguard Worker 
55*9880d681SAndroid Build Coastguard Worker class SIInsertWaits : public MachineFunctionPass {
56*9880d681SAndroid Build Coastguard Worker 
57*9880d681SAndroid Build Coastguard Worker private:
58*9880d681SAndroid Build Coastguard Worker   const SISubtarget *ST;
59*9880d681SAndroid Build Coastguard Worker   const SIInstrInfo *TII;
60*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo *TRI;
61*9880d681SAndroid Build Coastguard Worker   const MachineRegisterInfo *MRI;
62*9880d681SAndroid Build Coastguard Worker 
63*9880d681SAndroid Build Coastguard Worker   /// \brief Constant hardware limits
64*9880d681SAndroid Build Coastguard Worker   static const Counters WaitCounts;
65*9880d681SAndroid Build Coastguard Worker 
66*9880d681SAndroid Build Coastguard Worker   /// \brief Constant zero value
67*9880d681SAndroid Build Coastguard Worker   static const Counters ZeroCounts;
68*9880d681SAndroid Build Coastguard Worker 
69*9880d681SAndroid Build Coastguard Worker   /// \brief Counter values we have already waited on.
70*9880d681SAndroid Build Coastguard Worker   Counters WaitedOn;
71*9880d681SAndroid Build Coastguard Worker 
72*9880d681SAndroid Build Coastguard Worker   /// \brief Counter values that we must wait on before the next counter
73*9880d681SAndroid Build Coastguard Worker   /// increase.
74*9880d681SAndroid Build Coastguard Worker   Counters DelayedWaitOn;
75*9880d681SAndroid Build Coastguard Worker 
76*9880d681SAndroid Build Coastguard Worker   /// \brief Counter values for last instruction issued.
77*9880d681SAndroid Build Coastguard Worker   Counters LastIssued;
78*9880d681SAndroid Build Coastguard Worker 
79*9880d681SAndroid Build Coastguard Worker   /// \brief Registers used by async instructions.
80*9880d681SAndroid Build Coastguard Worker   RegCounters UsedRegs;
81*9880d681SAndroid Build Coastguard Worker 
82*9880d681SAndroid Build Coastguard Worker   /// \brief Registers defined by async instructions.
83*9880d681SAndroid Build Coastguard Worker   RegCounters DefinedRegs;
84*9880d681SAndroid Build Coastguard Worker 
85*9880d681SAndroid Build Coastguard Worker   /// \brief Different export instruction types seen since last wait.
86*9880d681SAndroid Build Coastguard Worker   unsigned ExpInstrTypesSeen;
87*9880d681SAndroid Build Coastguard Worker 
88*9880d681SAndroid Build Coastguard Worker   /// \brief Type of the last opcode.
89*9880d681SAndroid Build Coastguard Worker   InstType LastOpcodeType;
90*9880d681SAndroid Build Coastguard Worker 
91*9880d681SAndroid Build Coastguard Worker   bool LastInstWritesM0;
92*9880d681SAndroid Build Coastguard Worker 
93*9880d681SAndroid Build Coastguard Worker   /// \brief Whether the machine function returns void
94*9880d681SAndroid Build Coastguard Worker   bool ReturnsVoid;
95*9880d681SAndroid Build Coastguard Worker 
96*9880d681SAndroid Build Coastguard Worker   /// Whether the VCCZ bit is possibly corrupt
97*9880d681SAndroid Build Coastguard Worker   bool VCCZCorrupt;
98*9880d681SAndroid Build Coastguard Worker 
99*9880d681SAndroid Build Coastguard Worker   /// \brief Get increment/decrement amount for this instruction.
100*9880d681SAndroid Build Coastguard Worker   Counters getHwCounts(MachineInstr &MI);
101*9880d681SAndroid Build Coastguard Worker 
102*9880d681SAndroid Build Coastguard Worker   /// \brief Is operand relevant for async execution?
103*9880d681SAndroid Build Coastguard Worker   bool isOpRelevant(MachineOperand &Op);
104*9880d681SAndroid Build Coastguard Worker 
105*9880d681SAndroid Build Coastguard Worker   /// \brief Get register interval an operand affects.
106*9880d681SAndroid Build Coastguard Worker   RegInterval getRegInterval(const TargetRegisterClass *RC,
107*9880d681SAndroid Build Coastguard Worker                              const MachineOperand &Reg) const;
108*9880d681SAndroid Build Coastguard Worker 
109*9880d681SAndroid Build Coastguard Worker   /// \brief Handle instructions async components
110*9880d681SAndroid Build Coastguard Worker   void pushInstruction(MachineBasicBlock &MBB,
111*9880d681SAndroid Build Coastguard Worker                        MachineBasicBlock::iterator I,
112*9880d681SAndroid Build Coastguard Worker                        const Counters& Increment);
113*9880d681SAndroid Build Coastguard Worker 
114*9880d681SAndroid Build Coastguard Worker   /// \brief Insert the actual wait instruction
115*9880d681SAndroid Build Coastguard Worker   bool insertWait(MachineBasicBlock &MBB,
116*9880d681SAndroid Build Coastguard Worker                   MachineBasicBlock::iterator I,
117*9880d681SAndroid Build Coastguard Worker                   const Counters &Counts);
118*9880d681SAndroid Build Coastguard Worker 
119*9880d681SAndroid Build Coastguard Worker   /// \brief Handle existing wait instructions (from intrinsics)
120*9880d681SAndroid Build Coastguard Worker   void handleExistingWait(MachineBasicBlock::iterator I);
121*9880d681SAndroid Build Coastguard Worker 
122*9880d681SAndroid Build Coastguard Worker   /// \brief Do we need def2def checks?
123*9880d681SAndroid Build Coastguard Worker   bool unorderedDefines(MachineInstr &MI);
124*9880d681SAndroid Build Coastguard Worker 
125*9880d681SAndroid Build Coastguard Worker   /// \brief Resolve all operand dependencies to counter requirements
126*9880d681SAndroid Build Coastguard Worker   Counters handleOperands(MachineInstr &MI);
127*9880d681SAndroid Build Coastguard Worker 
128*9880d681SAndroid Build Coastguard Worker   /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
129*9880d681SAndroid Build Coastguard Worker   void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
130*9880d681SAndroid Build Coastguard Worker 
131*9880d681SAndroid Build Coastguard Worker   /// Return true if there are LGKM instrucitons that haven't been waited on
132*9880d681SAndroid Build Coastguard Worker   /// yet.
133*9880d681SAndroid Build Coastguard Worker   bool hasOutstandingLGKM() const;
134*9880d681SAndroid Build Coastguard Worker 
135*9880d681SAndroid Build Coastguard Worker public:
136*9880d681SAndroid Build Coastguard Worker   static char ID;
137*9880d681SAndroid Build Coastguard Worker 
SIInsertWaits()138*9880d681SAndroid Build Coastguard Worker   SIInsertWaits() :
139*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass(ID),
140*9880d681SAndroid Build Coastguard Worker     ST(nullptr),
141*9880d681SAndroid Build Coastguard Worker     TII(nullptr),
142*9880d681SAndroid Build Coastguard Worker     TRI(nullptr),
143*9880d681SAndroid Build Coastguard Worker     ExpInstrTypesSeen(0),
144*9880d681SAndroid Build Coastguard Worker     VCCZCorrupt(false) { }
145*9880d681SAndroid Build Coastguard Worker 
146*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override;
147*9880d681SAndroid Build Coastguard Worker 
getPassName() const148*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
149*9880d681SAndroid Build Coastguard Worker     return "SI insert wait instructions";
150*9880d681SAndroid Build Coastguard Worker   }
151*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const152*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
153*9880d681SAndroid Build Coastguard Worker     AU.setPreservesCFG();
154*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass::getAnalysisUsage(AU);
155*9880d681SAndroid Build Coastguard Worker   }
156*9880d681SAndroid Build Coastguard Worker };
157*9880d681SAndroid Build Coastguard Worker 
158*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
159*9880d681SAndroid Build Coastguard Worker 
160*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
161*9880d681SAndroid Build Coastguard Worker                       "SI Insert Waits", false, false)
162*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(SIInsertWaits, DEBUG_TYPE,
163*9880d681SAndroid Build Coastguard Worker                     "SI Insert Waits", false, false)
164*9880d681SAndroid Build Coastguard Worker 
165*9880d681SAndroid Build Coastguard Worker char SIInsertWaits::ID = 0;
166*9880d681SAndroid Build Coastguard Worker 
167*9880d681SAndroid Build Coastguard Worker char &llvm::SIInsertWaitsID = SIInsertWaits::ID;
168*9880d681SAndroid Build Coastguard Worker 
createSIInsertWaitsPass()169*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSIInsertWaitsPass() {
170*9880d681SAndroid Build Coastguard Worker   return new SIInsertWaits();
171*9880d681SAndroid Build Coastguard Worker }
172*9880d681SAndroid Build Coastguard Worker 
173*9880d681SAndroid Build Coastguard Worker const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
174*9880d681SAndroid Build Coastguard Worker const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
175*9880d681SAndroid Build Coastguard Worker 
readsVCCZ(unsigned Opcode)176*9880d681SAndroid Build Coastguard Worker static bool readsVCCZ(unsigned Opcode) {
177*9880d681SAndroid Build Coastguard Worker   return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCZ;
178*9880d681SAndroid Build Coastguard Worker }
179*9880d681SAndroid Build Coastguard Worker 
hasOutstandingLGKM() const180*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::hasOutstandingLGKM() const {
181*9880d681SAndroid Build Coastguard Worker   return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
182*9880d681SAndroid Build Coastguard Worker }
183*9880d681SAndroid Build Coastguard Worker 
getHwCounts(MachineInstr & MI)184*9880d681SAndroid Build Coastguard Worker Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
185*9880d681SAndroid Build Coastguard Worker   uint64_t TSFlags = MI.getDesc().TSFlags;
186*9880d681SAndroid Build Coastguard Worker   Counters Result = { { 0, 0, 0 } };
187*9880d681SAndroid Build Coastguard Worker 
188*9880d681SAndroid Build Coastguard Worker   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
189*9880d681SAndroid Build Coastguard Worker 
190*9880d681SAndroid Build Coastguard Worker   // Only consider stores or EXP for EXP_CNT
191*9880d681SAndroid Build Coastguard Worker   Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
192*9880d681SAndroid Build Coastguard Worker       (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
193*9880d681SAndroid Build Coastguard Worker 
194*9880d681SAndroid Build Coastguard Worker   // LGKM may uses larger values
195*9880d681SAndroid Build Coastguard Worker   if (TSFlags & SIInstrFlags::LGKM_CNT) {
196*9880d681SAndroid Build Coastguard Worker 
197*9880d681SAndroid Build Coastguard Worker     if (TII->isSMRD(MI)) {
198*9880d681SAndroid Build Coastguard Worker 
199*9880d681SAndroid Build Coastguard Worker       if (MI.getNumOperands() != 0) {
200*9880d681SAndroid Build Coastguard Worker         assert(MI.getOperand(0).isReg() &&
201*9880d681SAndroid Build Coastguard Worker                "First LGKM operand must be a register!");
202*9880d681SAndroid Build Coastguard Worker 
203*9880d681SAndroid Build Coastguard Worker         // XXX - What if this is a write into a super register?
204*9880d681SAndroid Build Coastguard Worker         const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
205*9880d681SAndroid Build Coastguard Worker         unsigned Size = RC->getSize();
206*9880d681SAndroid Build Coastguard Worker         Result.Named.LGKM = Size > 4 ? 2 : 1;
207*9880d681SAndroid Build Coastguard Worker       } else {
208*9880d681SAndroid Build Coastguard Worker         // s_dcache_inv etc. do not have a a destination register. Assume we
209*9880d681SAndroid Build Coastguard Worker         // want a wait on these.
210*9880d681SAndroid Build Coastguard Worker         // XXX - What is the right value?
211*9880d681SAndroid Build Coastguard Worker         Result.Named.LGKM = 1;
212*9880d681SAndroid Build Coastguard Worker       }
213*9880d681SAndroid Build Coastguard Worker     } else {
214*9880d681SAndroid Build Coastguard Worker       // DS
215*9880d681SAndroid Build Coastguard Worker       Result.Named.LGKM = 1;
216*9880d681SAndroid Build Coastguard Worker     }
217*9880d681SAndroid Build Coastguard Worker 
218*9880d681SAndroid Build Coastguard Worker   } else {
219*9880d681SAndroid Build Coastguard Worker     Result.Named.LGKM = 0;
220*9880d681SAndroid Build Coastguard Worker   }
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker   return Result;
223*9880d681SAndroid Build Coastguard Worker }
224*9880d681SAndroid Build Coastguard Worker 
isOpRelevant(MachineOperand & Op)225*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
226*9880d681SAndroid Build Coastguard Worker   // Constants are always irrelevant
227*9880d681SAndroid Build Coastguard Worker   if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
228*9880d681SAndroid Build Coastguard Worker     return false;
229*9880d681SAndroid Build Coastguard Worker 
230*9880d681SAndroid Build Coastguard Worker   // Defines are always relevant
231*9880d681SAndroid Build Coastguard Worker   if (Op.isDef())
232*9880d681SAndroid Build Coastguard Worker     return true;
233*9880d681SAndroid Build Coastguard Worker 
234*9880d681SAndroid Build Coastguard Worker   // For exports all registers are relevant
235*9880d681SAndroid Build Coastguard Worker   MachineInstr &MI = *Op.getParent();
236*9880d681SAndroid Build Coastguard Worker   if (MI.getOpcode() == AMDGPU::EXP)
237*9880d681SAndroid Build Coastguard Worker     return true;
238*9880d681SAndroid Build Coastguard Worker 
239*9880d681SAndroid Build Coastguard Worker   // For stores the stored value is also relevant
240*9880d681SAndroid Build Coastguard Worker   if (!MI.getDesc().mayStore())
241*9880d681SAndroid Build Coastguard Worker     return false;
242*9880d681SAndroid Build Coastguard Worker 
243*9880d681SAndroid Build Coastguard Worker   // Check if this operand is the value being stored.
244*9880d681SAndroid Build Coastguard Worker   // Special case for DS/FLAT instructions, since the address
245*9880d681SAndroid Build Coastguard Worker   // operand comes before the value operand and it may have
246*9880d681SAndroid Build Coastguard Worker   // multiple data operands.
247*9880d681SAndroid Build Coastguard Worker 
248*9880d681SAndroid Build Coastguard Worker   if (TII->isDS(MI) || TII->isFLAT(MI)) {
249*9880d681SAndroid Build Coastguard Worker     MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
250*9880d681SAndroid Build Coastguard Worker     if (Data && Op.isIdenticalTo(*Data))
251*9880d681SAndroid Build Coastguard Worker       return true;
252*9880d681SAndroid Build Coastguard Worker   }
253*9880d681SAndroid Build Coastguard Worker 
254*9880d681SAndroid Build Coastguard Worker   if (TII->isDS(MI)) {
255*9880d681SAndroid Build Coastguard Worker     MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
256*9880d681SAndroid Build Coastguard Worker     if (Data0 && Op.isIdenticalTo(*Data0))
257*9880d681SAndroid Build Coastguard Worker       return true;
258*9880d681SAndroid Build Coastguard Worker 
259*9880d681SAndroid Build Coastguard Worker     MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
260*9880d681SAndroid Build Coastguard Worker     return Data1 && Op.isIdenticalTo(*Data1);
261*9880d681SAndroid Build Coastguard Worker   }
262*9880d681SAndroid Build Coastguard Worker 
263*9880d681SAndroid Build Coastguard Worker   // NOTE: This assumes that the value operand is before the
264*9880d681SAndroid Build Coastguard Worker   // address operand, and that there is only one value operand.
265*9880d681SAndroid Build Coastguard Worker   for (MachineInstr::mop_iterator I = MI.operands_begin(),
266*9880d681SAndroid Build Coastguard Worker        E = MI.operands_end(); I != E; ++I) {
267*9880d681SAndroid Build Coastguard Worker 
268*9880d681SAndroid Build Coastguard Worker     if (I->isReg() && I->isUse())
269*9880d681SAndroid Build Coastguard Worker       return Op.isIdenticalTo(*I);
270*9880d681SAndroid Build Coastguard Worker   }
271*9880d681SAndroid Build Coastguard Worker 
272*9880d681SAndroid Build Coastguard Worker   return false;
273*9880d681SAndroid Build Coastguard Worker }
274*9880d681SAndroid Build Coastguard Worker 
getRegInterval(const TargetRegisterClass * RC,const MachineOperand & Reg) const275*9880d681SAndroid Build Coastguard Worker RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
276*9880d681SAndroid Build Coastguard Worker                                           const MachineOperand &Reg) const {
277*9880d681SAndroid Build Coastguard Worker   unsigned Size = RC->getSize();
278*9880d681SAndroid Build Coastguard Worker   assert(Size >= 4);
279*9880d681SAndroid Build Coastguard Worker 
280*9880d681SAndroid Build Coastguard Worker   RegInterval Result;
281*9880d681SAndroid Build Coastguard Worker   Result.first = TRI->getEncodingValue(Reg.getReg());
282*9880d681SAndroid Build Coastguard Worker   Result.second = Result.first + Size / 4;
283*9880d681SAndroid Build Coastguard Worker 
284*9880d681SAndroid Build Coastguard Worker   return Result;
285*9880d681SAndroid Build Coastguard Worker }
286*9880d681SAndroid Build Coastguard Worker 
pushInstruction(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Increment)287*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
288*9880d681SAndroid Build Coastguard Worker                                     MachineBasicBlock::iterator I,
289*9880d681SAndroid Build Coastguard Worker                                     const Counters &Increment) {
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker   // Get the hardware counter increments and sum them up
292*9880d681SAndroid Build Coastguard Worker   Counters Limit = ZeroCounts;
293*9880d681SAndroid Build Coastguard Worker   unsigned Sum = 0;
294*9880d681SAndroid Build Coastguard Worker 
295*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < 3; ++i) {
296*9880d681SAndroid Build Coastguard Worker     LastIssued.Array[i] += Increment.Array[i];
297*9880d681SAndroid Build Coastguard Worker     if (Increment.Array[i])
298*9880d681SAndroid Build Coastguard Worker       Limit.Array[i] = LastIssued.Array[i];
299*9880d681SAndroid Build Coastguard Worker     Sum += Increment.Array[i];
300*9880d681SAndroid Build Coastguard Worker   }
301*9880d681SAndroid Build Coastguard Worker 
302*9880d681SAndroid Build Coastguard Worker   // If we don't increase anything then that's it
303*9880d681SAndroid Build Coastguard Worker   if (Sum == 0) {
304*9880d681SAndroid Build Coastguard Worker     LastOpcodeType = OTHER;
305*9880d681SAndroid Build Coastguard Worker     return;
306*9880d681SAndroid Build Coastguard Worker   }
307*9880d681SAndroid Build Coastguard Worker 
308*9880d681SAndroid Build Coastguard Worker   if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
309*9880d681SAndroid Build Coastguard Worker     // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
310*9880d681SAndroid Build Coastguard Worker     // or SMEM clause, respectively.
311*9880d681SAndroid Build Coastguard Worker     //
312*9880d681SAndroid Build Coastguard Worker     // The temporary workaround is to break the clauses with S_NOP.
313*9880d681SAndroid Build Coastguard Worker     //
314*9880d681SAndroid Build Coastguard Worker     // The proper solution would be to allocate registers such that all source
315*9880d681SAndroid Build Coastguard Worker     // and destination registers don't overlap, e.g. this is illegal:
316*9880d681SAndroid Build Coastguard Worker     //   r0 = load r2
317*9880d681SAndroid Build Coastguard Worker     //   r2 = load r0
318*9880d681SAndroid Build Coastguard Worker     if (LastOpcodeType == VMEM && Increment.Named.VM) {
319*9880d681SAndroid Build Coastguard Worker       // Insert a NOP to break the clause.
320*9880d681SAndroid Build Coastguard Worker       BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
321*9880d681SAndroid Build Coastguard Worker           .addImm(0);
322*9880d681SAndroid Build Coastguard Worker       LastInstWritesM0 = false;
323*9880d681SAndroid Build Coastguard Worker     }
324*9880d681SAndroid Build Coastguard Worker 
325*9880d681SAndroid Build Coastguard Worker     if (TII->isSMRD(*I))
326*9880d681SAndroid Build Coastguard Worker       LastOpcodeType = SMEM;
327*9880d681SAndroid Build Coastguard Worker     else if (Increment.Named.VM)
328*9880d681SAndroid Build Coastguard Worker       LastOpcodeType = VMEM;
329*9880d681SAndroid Build Coastguard Worker   }
330*9880d681SAndroid Build Coastguard Worker 
331*9880d681SAndroid Build Coastguard Worker   // Remember which export instructions we have seen
332*9880d681SAndroid Build Coastguard Worker   if (Increment.Named.EXP) {
333*9880d681SAndroid Build Coastguard Worker     ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
334*9880d681SAndroid Build Coastguard Worker   }
335*9880d681SAndroid Build Coastguard Worker 
336*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
337*9880d681SAndroid Build Coastguard Worker     MachineOperand &Op = I->getOperand(i);
338*9880d681SAndroid Build Coastguard Worker     if (!isOpRelevant(Op))
339*9880d681SAndroid Build Coastguard Worker       continue;
340*9880d681SAndroid Build Coastguard Worker 
341*9880d681SAndroid Build Coastguard Worker     const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
342*9880d681SAndroid Build Coastguard Worker     RegInterval Interval = getRegInterval(RC, Op);
343*9880d681SAndroid Build Coastguard Worker     for (unsigned j = Interval.first; j < Interval.second; ++j) {
344*9880d681SAndroid Build Coastguard Worker 
345*9880d681SAndroid Build Coastguard Worker       // Remember which registers we define
346*9880d681SAndroid Build Coastguard Worker       if (Op.isDef())
347*9880d681SAndroid Build Coastguard Worker         DefinedRegs[j] = Limit;
348*9880d681SAndroid Build Coastguard Worker 
349*9880d681SAndroid Build Coastguard Worker       // and which one we are using
350*9880d681SAndroid Build Coastguard Worker       if (Op.isUse())
351*9880d681SAndroid Build Coastguard Worker         UsedRegs[j] = Limit;
352*9880d681SAndroid Build Coastguard Worker     }
353*9880d681SAndroid Build Coastguard Worker   }
354*9880d681SAndroid Build Coastguard Worker }
355*9880d681SAndroid Build Coastguard Worker 
insertWait(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Required)356*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
357*9880d681SAndroid Build Coastguard Worker                                MachineBasicBlock::iterator I,
358*9880d681SAndroid Build Coastguard Worker                                const Counters &Required) {
359*9880d681SAndroid Build Coastguard Worker 
360*9880d681SAndroid Build Coastguard Worker   // End of program? No need to wait on anything
361*9880d681SAndroid Build Coastguard Worker   // A function not returning void needs to wait, because other bytecode will
362*9880d681SAndroid Build Coastguard Worker   // be appended after it and we don't know what it will be.
363*9880d681SAndroid Build Coastguard Worker   if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
364*9880d681SAndroid Build Coastguard Worker     return false;
365*9880d681SAndroid Build Coastguard Worker 
366*9880d681SAndroid Build Coastguard Worker   // Figure out if the async instructions execute in order
367*9880d681SAndroid Build Coastguard Worker   bool Ordered[3];
368*9880d681SAndroid Build Coastguard Worker 
369*9880d681SAndroid Build Coastguard Worker   // VM_CNT is always ordered
370*9880d681SAndroid Build Coastguard Worker   Ordered[0] = true;
371*9880d681SAndroid Build Coastguard Worker 
372*9880d681SAndroid Build Coastguard Worker   // EXP_CNT is unordered if we have both EXP & VM-writes
373*9880d681SAndroid Build Coastguard Worker   Ordered[1] = ExpInstrTypesSeen == 3;
374*9880d681SAndroid Build Coastguard Worker 
375*9880d681SAndroid Build Coastguard Worker   // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
376*9880d681SAndroid Build Coastguard Worker   Ordered[2] = false;
377*9880d681SAndroid Build Coastguard Worker 
378*9880d681SAndroid Build Coastguard Worker   // The values we are going to put into the S_WAITCNT instruction
379*9880d681SAndroid Build Coastguard Worker   Counters Counts = WaitCounts;
380*9880d681SAndroid Build Coastguard Worker 
381*9880d681SAndroid Build Coastguard Worker   // Do we really need to wait?
382*9880d681SAndroid Build Coastguard Worker   bool NeedWait = false;
383*9880d681SAndroid Build Coastguard Worker 
384*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < 3; ++i) {
385*9880d681SAndroid Build Coastguard Worker 
386*9880d681SAndroid Build Coastguard Worker     if (Required.Array[i] <= WaitedOn.Array[i])
387*9880d681SAndroid Build Coastguard Worker       continue;
388*9880d681SAndroid Build Coastguard Worker 
389*9880d681SAndroid Build Coastguard Worker     NeedWait = true;
390*9880d681SAndroid Build Coastguard Worker 
391*9880d681SAndroid Build Coastguard Worker     if (Ordered[i]) {
392*9880d681SAndroid Build Coastguard Worker       unsigned Value = LastIssued.Array[i] - Required.Array[i];
393*9880d681SAndroid Build Coastguard Worker 
394*9880d681SAndroid Build Coastguard Worker       // Adjust the value to the real hardware possibilities.
395*9880d681SAndroid Build Coastguard Worker       Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
396*9880d681SAndroid Build Coastguard Worker 
397*9880d681SAndroid Build Coastguard Worker     } else
398*9880d681SAndroid Build Coastguard Worker       Counts.Array[i] = 0;
399*9880d681SAndroid Build Coastguard Worker 
400*9880d681SAndroid Build Coastguard Worker     // Remember on what we have waited on.
401*9880d681SAndroid Build Coastguard Worker     WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
402*9880d681SAndroid Build Coastguard Worker   }
403*9880d681SAndroid Build Coastguard Worker 
404*9880d681SAndroid Build Coastguard Worker   if (!NeedWait)
405*9880d681SAndroid Build Coastguard Worker     return false;
406*9880d681SAndroid Build Coastguard Worker 
407*9880d681SAndroid Build Coastguard Worker   // Reset EXP_CNT instruction types
408*9880d681SAndroid Build Coastguard Worker   if (Counts.Named.EXP == 0)
409*9880d681SAndroid Build Coastguard Worker     ExpInstrTypesSeen = 0;
410*9880d681SAndroid Build Coastguard Worker 
411*9880d681SAndroid Build Coastguard Worker   // Build the wait instruction
412*9880d681SAndroid Build Coastguard Worker   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
413*9880d681SAndroid Build Coastguard Worker           .addImm((Counts.Named.VM & 0xF) |
414*9880d681SAndroid Build Coastguard Worker                   ((Counts.Named.EXP & 0x7) << 4) |
415*9880d681SAndroid Build Coastguard Worker                   ((Counts.Named.LGKM & 0xF) << 8));
416*9880d681SAndroid Build Coastguard Worker 
417*9880d681SAndroid Build Coastguard Worker   LastOpcodeType = OTHER;
418*9880d681SAndroid Build Coastguard Worker   LastInstWritesM0 = false;
419*9880d681SAndroid Build Coastguard Worker   return true;
420*9880d681SAndroid Build Coastguard Worker }
421*9880d681SAndroid Build Coastguard Worker 
422*9880d681SAndroid Build Coastguard Worker /// \brief helper function for handleOperands
increaseCounters(Counters & Dst,const Counters & Src)423*9880d681SAndroid Build Coastguard Worker static void increaseCounters(Counters &Dst, const Counters &Src) {
424*9880d681SAndroid Build Coastguard Worker 
425*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < 3; ++i)
426*9880d681SAndroid Build Coastguard Worker     Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
427*9880d681SAndroid Build Coastguard Worker }
428*9880d681SAndroid Build Coastguard Worker 
429*9880d681SAndroid Build Coastguard Worker /// \brief check whether any of the counters is non-zero
countersNonZero(const Counters & Counter)430*9880d681SAndroid Build Coastguard Worker static bool countersNonZero(const Counters &Counter) {
431*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < 3; ++i)
432*9880d681SAndroid Build Coastguard Worker     if (Counter.Array[i])
433*9880d681SAndroid Build Coastguard Worker       return true;
434*9880d681SAndroid Build Coastguard Worker   return false;
435*9880d681SAndroid Build Coastguard Worker }
436*9880d681SAndroid Build Coastguard Worker 
handleExistingWait(MachineBasicBlock::iterator I)437*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
438*9880d681SAndroid Build Coastguard Worker   assert(I->getOpcode() == AMDGPU::S_WAITCNT);
439*9880d681SAndroid Build Coastguard Worker 
440*9880d681SAndroid Build Coastguard Worker   unsigned Imm = I->getOperand(0).getImm();
441*9880d681SAndroid Build Coastguard Worker   Counters Counts, WaitOn;
442*9880d681SAndroid Build Coastguard Worker 
443*9880d681SAndroid Build Coastguard Worker   Counts.Named.VM = Imm & 0xF;
444*9880d681SAndroid Build Coastguard Worker   Counts.Named.EXP = (Imm >> 4) & 0x7;
445*9880d681SAndroid Build Coastguard Worker   Counts.Named.LGKM = (Imm >> 8) & 0xF;
446*9880d681SAndroid Build Coastguard Worker 
447*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < 3; ++i) {
448*9880d681SAndroid Build Coastguard Worker     if (Counts.Array[i] <= LastIssued.Array[i])
449*9880d681SAndroid Build Coastguard Worker       WaitOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
450*9880d681SAndroid Build Coastguard Worker     else
451*9880d681SAndroid Build Coastguard Worker       WaitOn.Array[i] = 0;
452*9880d681SAndroid Build Coastguard Worker   }
453*9880d681SAndroid Build Coastguard Worker 
454*9880d681SAndroid Build Coastguard Worker   increaseCounters(DelayedWaitOn, WaitOn);
455*9880d681SAndroid Build Coastguard Worker }
456*9880d681SAndroid Build Coastguard Worker 
handleOperands(MachineInstr & MI)457*9880d681SAndroid Build Coastguard Worker Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
458*9880d681SAndroid Build Coastguard Worker 
459*9880d681SAndroid Build Coastguard Worker   Counters Result = ZeroCounts;
460*9880d681SAndroid Build Coastguard Worker 
461*9880d681SAndroid Build Coastguard Worker   // For each register affected by this instruction increase the result
462*9880d681SAndroid Build Coastguard Worker   // sequence.
463*9880d681SAndroid Build Coastguard Worker   //
464*9880d681SAndroid Build Coastguard Worker   // TODO: We could probably just look at explicit operands if we removed VCC /
465*9880d681SAndroid Build Coastguard Worker   // EXEC from SMRD dest reg classes.
466*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
467*9880d681SAndroid Build Coastguard Worker     MachineOperand &Op = MI.getOperand(i);
468*9880d681SAndroid Build Coastguard Worker     if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
469*9880d681SAndroid Build Coastguard Worker       continue;
470*9880d681SAndroid Build Coastguard Worker 
471*9880d681SAndroid Build Coastguard Worker     const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
472*9880d681SAndroid Build Coastguard Worker     RegInterval Interval = getRegInterval(RC, Op);
473*9880d681SAndroid Build Coastguard Worker     for (unsigned j = Interval.first; j < Interval.second; ++j) {
474*9880d681SAndroid Build Coastguard Worker 
475*9880d681SAndroid Build Coastguard Worker       if (Op.isDef()) {
476*9880d681SAndroid Build Coastguard Worker         increaseCounters(Result, UsedRegs[j]);
477*9880d681SAndroid Build Coastguard Worker         increaseCounters(Result, DefinedRegs[j]);
478*9880d681SAndroid Build Coastguard Worker       }
479*9880d681SAndroid Build Coastguard Worker 
480*9880d681SAndroid Build Coastguard Worker       if (Op.isUse())
481*9880d681SAndroid Build Coastguard Worker         increaseCounters(Result, DefinedRegs[j]);
482*9880d681SAndroid Build Coastguard Worker     }
483*9880d681SAndroid Build Coastguard Worker   }
484*9880d681SAndroid Build Coastguard Worker 
485*9880d681SAndroid Build Coastguard Worker   return Result;
486*9880d681SAndroid Build Coastguard Worker }
487*9880d681SAndroid Build Coastguard Worker 
handleSendMsg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)488*9880d681SAndroid Build Coastguard Worker void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
489*9880d681SAndroid Build Coastguard Worker                                   MachineBasicBlock::iterator I) {
490*9880d681SAndroid Build Coastguard Worker   if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
491*9880d681SAndroid Build Coastguard Worker     return;
492*9880d681SAndroid Build Coastguard Worker 
493*9880d681SAndroid Build Coastguard Worker   // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
494*9880d681SAndroid Build Coastguard Worker   if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
495*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
496*9880d681SAndroid Build Coastguard Worker     LastInstWritesM0 = false;
497*9880d681SAndroid Build Coastguard Worker     return;
498*9880d681SAndroid Build Coastguard Worker   }
499*9880d681SAndroid Build Coastguard Worker 
500*9880d681SAndroid Build Coastguard Worker   // Set whether this instruction sets M0
501*9880d681SAndroid Build Coastguard Worker   LastInstWritesM0 = false;
502*9880d681SAndroid Build Coastguard Worker 
503*9880d681SAndroid Build Coastguard Worker   unsigned NumOperands = I->getNumOperands();
504*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0; i < NumOperands; i++) {
505*9880d681SAndroid Build Coastguard Worker     const MachineOperand &Op = I->getOperand(i);
506*9880d681SAndroid Build Coastguard Worker 
507*9880d681SAndroid Build Coastguard Worker     if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
508*9880d681SAndroid Build Coastguard Worker       LastInstWritesM0 = true;
509*9880d681SAndroid Build Coastguard Worker   }
510*9880d681SAndroid Build Coastguard Worker }
511*9880d681SAndroid Build Coastguard Worker 
512*9880d681SAndroid Build Coastguard Worker // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
513*9880d681SAndroid Build Coastguard Worker // around other non-memory instructions.
runOnMachineFunction(MachineFunction & MF)514*9880d681SAndroid Build Coastguard Worker bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
515*9880d681SAndroid Build Coastguard Worker   bool Changes = false;
516*9880d681SAndroid Build Coastguard Worker 
517*9880d681SAndroid Build Coastguard Worker   ST = &MF.getSubtarget<SISubtarget>();
518*9880d681SAndroid Build Coastguard Worker   TII = ST->getInstrInfo();
519*9880d681SAndroid Build Coastguard Worker   TRI = &TII->getRegisterInfo();
520*9880d681SAndroid Build Coastguard Worker   MRI = &MF.getRegInfo();
521*9880d681SAndroid Build Coastguard Worker 
522*9880d681SAndroid Build Coastguard Worker   WaitedOn = ZeroCounts;
523*9880d681SAndroid Build Coastguard Worker   DelayedWaitOn = ZeroCounts;
524*9880d681SAndroid Build Coastguard Worker   LastIssued = ZeroCounts;
525*9880d681SAndroid Build Coastguard Worker   LastOpcodeType = OTHER;
526*9880d681SAndroid Build Coastguard Worker   LastInstWritesM0 = false;
527*9880d681SAndroid Build Coastguard Worker   ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
528*9880d681SAndroid Build Coastguard Worker 
529*9880d681SAndroid Build Coastguard Worker   memset(&UsedRegs, 0, sizeof(UsedRegs));
530*9880d681SAndroid Build Coastguard Worker   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
531*9880d681SAndroid Build Coastguard Worker 
532*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineInstr *, 4> RemoveMI;
533*9880d681SAndroid Build Coastguard Worker 
534*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
535*9880d681SAndroid Build Coastguard Worker        BI != BE; ++BI) {
536*9880d681SAndroid Build Coastguard Worker 
537*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *BI;
538*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
539*9880d681SAndroid Build Coastguard Worker          I != E; ++I) {
540*9880d681SAndroid Build Coastguard Worker 
541*9880d681SAndroid Build Coastguard Worker       if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
542*9880d681SAndroid Build Coastguard Worker         // There is a hardware bug on CI/SI where SMRD instruction may corrupt
543*9880d681SAndroid Build Coastguard Worker         // vccz bit, so when we detect that an instruction may read from a
544*9880d681SAndroid Build Coastguard Worker         // corrupt vccz bit, we need to:
545*9880d681SAndroid Build Coastguard Worker         // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD operations to
546*9880d681SAndroid Build Coastguard Worker         //    complete.
547*9880d681SAndroid Build Coastguard Worker         // 2. Restore the correct value of vccz by writing the current value
548*9880d681SAndroid Build Coastguard Worker         //    of vcc back to vcc.
549*9880d681SAndroid Build Coastguard Worker 
550*9880d681SAndroid Build Coastguard Worker         if (TII->isSMRD(I->getOpcode())) {
551*9880d681SAndroid Build Coastguard Worker           VCCZCorrupt = true;
552*9880d681SAndroid Build Coastguard Worker         } else if (!hasOutstandingLGKM() && I->modifiesRegister(AMDGPU::VCC, TRI)) {
553*9880d681SAndroid Build Coastguard Worker           // FIXME: We only care about SMRD instructions here, not LDS or GDS.
554*9880d681SAndroid Build Coastguard Worker           // Whenever we store a value in vcc, the correct value of vccz is
555*9880d681SAndroid Build Coastguard Worker           // restored.
556*9880d681SAndroid Build Coastguard Worker           VCCZCorrupt = false;
557*9880d681SAndroid Build Coastguard Worker         }
558*9880d681SAndroid Build Coastguard Worker 
559*9880d681SAndroid Build Coastguard Worker         // Check if we need to apply the bug work-around
560*9880d681SAndroid Build Coastguard Worker         if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
561*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
562*9880d681SAndroid Build Coastguard Worker 
563*9880d681SAndroid Build Coastguard Worker           // Wait on everything, not just LGKM.  vccz reads usually come from
564*9880d681SAndroid Build Coastguard Worker           // terminators, and we always wait on everything at the end of the
565*9880d681SAndroid Build Coastguard Worker           // block, so if we only wait on LGKM here, we might end up with
566*9880d681SAndroid Build Coastguard Worker           // another s_waitcnt inserted right after this if there are non-LGKM
567*9880d681SAndroid Build Coastguard Worker           // instructions still outstanding.
568*9880d681SAndroid Build Coastguard Worker           insertWait(MBB, I, LastIssued);
569*9880d681SAndroid Build Coastguard Worker 
570*9880d681SAndroid Build Coastguard Worker           // Restore the vccz bit.  Any time a value is written to vcc, the vcc
571*9880d681SAndroid Build Coastguard Worker           // bit is updated, so we can restore the bit by reading the value of
572*9880d681SAndroid Build Coastguard Worker           // vcc and then writing it back to the register.
573*9880d681SAndroid Build Coastguard Worker           BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
574*9880d681SAndroid Build Coastguard Worker                   AMDGPU::VCC)
575*9880d681SAndroid Build Coastguard Worker                   .addReg(AMDGPU::VCC);
576*9880d681SAndroid Build Coastguard Worker         }
577*9880d681SAndroid Build Coastguard Worker       }
578*9880d681SAndroid Build Coastguard Worker 
579*9880d681SAndroid Build Coastguard Worker       // Record pre-existing, explicitly requested waits
580*9880d681SAndroid Build Coastguard Worker       if (I->getOpcode() == AMDGPU::S_WAITCNT) {
581*9880d681SAndroid Build Coastguard Worker         handleExistingWait(*I);
582*9880d681SAndroid Build Coastguard Worker         RemoveMI.push_back(&*I);
583*9880d681SAndroid Build Coastguard Worker         continue;
584*9880d681SAndroid Build Coastguard Worker       }
585*9880d681SAndroid Build Coastguard Worker 
586*9880d681SAndroid Build Coastguard Worker       Counters Required;
587*9880d681SAndroid Build Coastguard Worker 
588*9880d681SAndroid Build Coastguard Worker       // Wait for everything before a barrier.
589*9880d681SAndroid Build Coastguard Worker       //
590*9880d681SAndroid Build Coastguard Worker       // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
591*9880d681SAndroid Build Coastguard Worker       // but we also want to wait for any other outstanding transfers before
592*9880d681SAndroid Build Coastguard Worker       // signalling other hardware blocks
593*9880d681SAndroid Build Coastguard Worker       if (I->getOpcode() == AMDGPU::S_BARRIER ||
594*9880d681SAndroid Build Coastguard Worker           I->getOpcode() == AMDGPU::S_SENDMSG)
595*9880d681SAndroid Build Coastguard Worker         Required = LastIssued;
596*9880d681SAndroid Build Coastguard Worker       else
597*9880d681SAndroid Build Coastguard Worker         Required = handleOperands(*I);
598*9880d681SAndroid Build Coastguard Worker 
599*9880d681SAndroid Build Coastguard Worker       Counters Increment = getHwCounts(*I);
600*9880d681SAndroid Build Coastguard Worker 
601*9880d681SAndroid Build Coastguard Worker       if (countersNonZero(Required) || countersNonZero(Increment))
602*9880d681SAndroid Build Coastguard Worker         increaseCounters(Required, DelayedWaitOn);
603*9880d681SAndroid Build Coastguard Worker 
604*9880d681SAndroid Build Coastguard Worker       Changes |= insertWait(MBB, I, Required);
605*9880d681SAndroid Build Coastguard Worker 
606*9880d681SAndroid Build Coastguard Worker       pushInstruction(MBB, I, Increment);
607*9880d681SAndroid Build Coastguard Worker       handleSendMsg(MBB, I);
608*9880d681SAndroid Build Coastguard Worker     }
609*9880d681SAndroid Build Coastguard Worker 
610*9880d681SAndroid Build Coastguard Worker     // Wait for everything at the end of the MBB
611*9880d681SAndroid Build Coastguard Worker     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
612*9880d681SAndroid Build Coastguard Worker   }
613*9880d681SAndroid Build Coastguard Worker 
614*9880d681SAndroid Build Coastguard Worker   for (MachineInstr *I : RemoveMI)
615*9880d681SAndroid Build Coastguard Worker     I->eraseFromParent();
616*9880d681SAndroid Build Coastguard Worker 
617*9880d681SAndroid Build Coastguard Worker   return Changes;
618*9880d681SAndroid Build Coastguard Worker }
619