xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief This pass adds instructions to enable whole quad mode for pixel
12*9880d681SAndroid Build Coastguard Worker /// shaders.
13*9880d681SAndroid Build Coastguard Worker ///
14*9880d681SAndroid Build Coastguard Worker /// Whole quad mode is required for derivative computations, but it interferes
15*9880d681SAndroid Build Coastguard Worker /// with shader side effects (stores and atomics). This pass is run on the
16*9880d681SAndroid Build Coastguard Worker /// scheduled machine IR but before register coalescing, so that machine SSA is
17*9880d681SAndroid Build Coastguard Worker /// available for analysis. It ensures that WQM is enabled when necessary, but
18*9880d681SAndroid Build Coastguard Worker /// disabled around stores and atomics.
19*9880d681SAndroid Build Coastguard Worker ///
20*9880d681SAndroid Build Coastguard Worker /// When necessary, this pass creates a function prolog
21*9880d681SAndroid Build Coastguard Worker ///
22*9880d681SAndroid Build Coastguard Worker ///   S_MOV_B64 LiveMask, EXEC
23*9880d681SAndroid Build Coastguard Worker ///   S_WQM_B64 EXEC, EXEC
24*9880d681SAndroid Build Coastguard Worker ///
25*9880d681SAndroid Build Coastguard Worker /// to enter WQM at the top of the function and surrounds blocks of Exact
26*9880d681SAndroid Build Coastguard Worker /// instructions by
27*9880d681SAndroid Build Coastguard Worker ///
28*9880d681SAndroid Build Coastguard Worker ///   S_AND_SAVEEXEC_B64 Tmp, LiveMask
29*9880d681SAndroid Build Coastguard Worker ///   ...
30*9880d681SAndroid Build Coastguard Worker ///   S_MOV_B64 EXEC, Tmp
31*9880d681SAndroid Build Coastguard Worker ///
32*9880d681SAndroid Build Coastguard Worker /// In order to avoid excessive switching during sequences of Exact
33*9880d681SAndroid Build Coastguard Worker /// instructions, the pass first analyzes which instructions must be run in WQM
34*9880d681SAndroid Build Coastguard Worker /// (aka which instructions produce values that lead to derivative
35*9880d681SAndroid Build Coastguard Worker /// computations).
36*9880d681SAndroid Build Coastguard Worker ///
37*9880d681SAndroid Build Coastguard Worker /// Basic blocks are always exited in WQM as long as some successor needs WQM.
38*9880d681SAndroid Build Coastguard Worker ///
39*9880d681SAndroid Build Coastguard Worker /// There is room for improvement given better control flow analysis:
40*9880d681SAndroid Build Coastguard Worker ///
41*9880d681SAndroid Build Coastguard Worker ///  (1) at the top level (outside of control flow statements, and as long as
42*9880d681SAndroid Build Coastguard Worker ///      kill hasn't been used), one SGPR can be saved by recovering WQM from
43*9880d681SAndroid Build Coastguard Worker ///      the LiveMask (this is implemented for the entry block).
44*9880d681SAndroid Build Coastguard Worker ///
45*9880d681SAndroid Build Coastguard Worker ///  (2) when entire regions (e.g. if-else blocks or entire loops) only
46*9880d681SAndroid Build Coastguard Worker ///      consist of exact and don't-care instructions, the switch only has to
47*9880d681SAndroid Build Coastguard Worker ///      be done at the entry and exit points rather than potentially in each
48*9880d681SAndroid Build Coastguard Worker ///      block of the region.
49*9880d681SAndroid Build Coastguard Worker ///
50*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
51*9880d681SAndroid Build Coastguard Worker 
52*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
53*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
54*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
55*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
56*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
57*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
58*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
59*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
60*9880d681SAndroid Build Coastguard Worker 
61*9880d681SAndroid Build Coastguard Worker using namespace llvm;
62*9880d681SAndroid Build Coastguard Worker 
63*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-wqm"
64*9880d681SAndroid Build Coastguard Worker 
65*9880d681SAndroid Build Coastguard Worker namespace {
66*9880d681SAndroid Build Coastguard Worker 
67*9880d681SAndroid Build Coastguard Worker enum {
68*9880d681SAndroid Build Coastguard Worker   StateWQM = 0x1,
69*9880d681SAndroid Build Coastguard Worker   StateExact = 0x2,
70*9880d681SAndroid Build Coastguard Worker };
71*9880d681SAndroid Build Coastguard Worker 
72*9880d681SAndroid Build Coastguard Worker struct InstrInfo {
73*9880d681SAndroid Build Coastguard Worker   char Needs = 0;
74*9880d681SAndroid Build Coastguard Worker   char OutNeeds = 0;
75*9880d681SAndroid Build Coastguard Worker };
76*9880d681SAndroid Build Coastguard Worker 
77*9880d681SAndroid Build Coastguard Worker struct BlockInfo {
78*9880d681SAndroid Build Coastguard Worker   char Needs = 0;
79*9880d681SAndroid Build Coastguard Worker   char InNeeds = 0;
80*9880d681SAndroid Build Coastguard Worker   char OutNeeds = 0;
81*9880d681SAndroid Build Coastguard Worker };
82*9880d681SAndroid Build Coastguard Worker 
83*9880d681SAndroid Build Coastguard Worker struct WorkItem {
84*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *MBB = nullptr;
85*9880d681SAndroid Build Coastguard Worker   MachineInstr *MI = nullptr;
86*9880d681SAndroid Build Coastguard Worker 
WorkItem__anon781a08160111::WorkItem87*9880d681SAndroid Build Coastguard Worker   WorkItem() {}
WorkItem__anon781a08160111::WorkItem88*9880d681SAndroid Build Coastguard Worker   WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
WorkItem__anon781a08160111::WorkItem89*9880d681SAndroid Build Coastguard Worker   WorkItem(MachineInstr *MI) : MI(MI) {}
90*9880d681SAndroid Build Coastguard Worker };
91*9880d681SAndroid Build Coastguard Worker 
92*9880d681SAndroid Build Coastguard Worker class SIWholeQuadMode : public MachineFunctionPass {
93*9880d681SAndroid Build Coastguard Worker private:
94*9880d681SAndroid Build Coastguard Worker   const SIInstrInfo *TII;
95*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo *TRI;
96*9880d681SAndroid Build Coastguard Worker   MachineRegisterInfo *MRI;
97*9880d681SAndroid Build Coastguard Worker 
98*9880d681SAndroid Build Coastguard Worker   DenseMap<const MachineInstr *, InstrInfo> Instructions;
99*9880d681SAndroid Build Coastguard Worker   DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
100*9880d681SAndroid Build Coastguard Worker   SmallVector<const MachineInstr *, 2> ExecExports;
101*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineInstr *, 1> LiveMaskQueries;
102*9880d681SAndroid Build Coastguard Worker 
103*9880d681SAndroid Build Coastguard Worker   char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
104*9880d681SAndroid Build Coastguard Worker   void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
105*9880d681SAndroid Build Coastguard Worker   void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
106*9880d681SAndroid Build Coastguard Worker   char analyzeFunction(MachineFunction &MF);
107*9880d681SAndroid Build Coastguard Worker 
108*9880d681SAndroid Build Coastguard Worker   void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
109*9880d681SAndroid Build Coastguard Worker                unsigned SaveWQM, unsigned LiveMaskReg);
110*9880d681SAndroid Build Coastguard Worker   void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
111*9880d681SAndroid Build Coastguard Worker              unsigned SavedWQM);
112*9880d681SAndroid Build Coastguard Worker   void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
113*9880d681SAndroid Build Coastguard Worker 
114*9880d681SAndroid Build Coastguard Worker   void lowerLiveMaskQueries(unsigned LiveMaskReg);
115*9880d681SAndroid Build Coastguard Worker 
116*9880d681SAndroid Build Coastguard Worker public:
117*9880d681SAndroid Build Coastguard Worker   static char ID;
118*9880d681SAndroid Build Coastguard Worker 
SIWholeQuadMode()119*9880d681SAndroid Build Coastguard Worker   SIWholeQuadMode() :
120*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass(ID) { }
121*9880d681SAndroid Build Coastguard Worker 
122*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override;
123*9880d681SAndroid Build Coastguard Worker 
getPassName() const124*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
125*9880d681SAndroid Build Coastguard Worker     return "SI Whole Quad Mode";
126*9880d681SAndroid Build Coastguard Worker   }
127*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const128*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
129*9880d681SAndroid Build Coastguard Worker     AU.setPreservesCFG();
130*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass::getAnalysisUsage(AU);
131*9880d681SAndroid Build Coastguard Worker   }
132*9880d681SAndroid Build Coastguard Worker };
133*9880d681SAndroid Build Coastguard Worker 
134*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
135*9880d681SAndroid Build Coastguard Worker 
136*9880d681SAndroid Build Coastguard Worker char SIWholeQuadMode::ID = 0;
137*9880d681SAndroid Build Coastguard Worker 
138*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
139*9880d681SAndroid Build Coastguard Worker                 "SI Whole Quad Mode", false, false)
140*9880d681SAndroid Build Coastguard Worker 
141*9880d681SAndroid Build Coastguard Worker char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
142*9880d681SAndroid Build Coastguard Worker 
createSIWholeQuadModePass()143*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSIWholeQuadModePass() {
144*9880d681SAndroid Build Coastguard Worker   return new SIWholeQuadMode;
145*9880d681SAndroid Build Coastguard Worker }
146*9880d681SAndroid Build Coastguard Worker 
147*9880d681SAndroid Build Coastguard Worker // Scan instructions to determine which ones require an Exact execmask and
148*9880d681SAndroid Build Coastguard Worker // which ones seed WQM requirements.
scanInstructions(MachineFunction & MF,std::vector<WorkItem> & Worklist)149*9880d681SAndroid Build Coastguard Worker char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
150*9880d681SAndroid Build Coastguard Worker                                        std::vector<WorkItem> &Worklist) {
151*9880d681SAndroid Build Coastguard Worker   char GlobalFlags = 0;
152*9880d681SAndroid Build Coastguard Worker   bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
153*9880d681SAndroid Build Coastguard Worker 
154*9880d681SAndroid Build Coastguard Worker   for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
155*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *BI;
156*9880d681SAndroid Build Coastguard Worker 
157*9880d681SAndroid Build Coastguard Worker     for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
158*9880d681SAndroid Build Coastguard Worker       MachineInstr &MI = *II;
159*9880d681SAndroid Build Coastguard Worker       unsigned Opcode = MI.getOpcode();
160*9880d681SAndroid Build Coastguard Worker       char Flags = 0;
161*9880d681SAndroid Build Coastguard Worker 
162*9880d681SAndroid Build Coastguard Worker       if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
163*9880d681SAndroid Build Coastguard Worker         Flags = StateWQM;
164*9880d681SAndroid Build Coastguard Worker       } else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
165*9880d681SAndroid Build Coastguard Worker         Flags = StateExact;
166*9880d681SAndroid Build Coastguard Worker       } else {
167*9880d681SAndroid Build Coastguard Worker         // Handle export instructions with the exec mask valid flag set
168*9880d681SAndroid Build Coastguard Worker         if (Opcode == AMDGPU::EXP) {
169*9880d681SAndroid Build Coastguard Worker           if (MI.getOperand(4).getImm() != 0)
170*9880d681SAndroid Build Coastguard Worker             ExecExports.push_back(&MI);
171*9880d681SAndroid Build Coastguard Worker         } else if (Opcode == AMDGPU::SI_PS_LIVE) {
172*9880d681SAndroid Build Coastguard Worker           LiveMaskQueries.push_back(&MI);
173*9880d681SAndroid Build Coastguard Worker         } else if (WQMOutputs) {
174*9880d681SAndroid Build Coastguard Worker           // The function is in machine SSA form, which means that physical
175*9880d681SAndroid Build Coastguard Worker           // VGPRs correspond to shader inputs and outputs. Inputs are
176*9880d681SAndroid Build Coastguard Worker           // only used, outputs are only defined.
177*9880d681SAndroid Build Coastguard Worker           for (const MachineOperand &MO : MI.defs()) {
178*9880d681SAndroid Build Coastguard Worker             if (!MO.isReg())
179*9880d681SAndroid Build Coastguard Worker               continue;
180*9880d681SAndroid Build Coastguard Worker 
181*9880d681SAndroid Build Coastguard Worker             unsigned Reg = MO.getReg();
182*9880d681SAndroid Build Coastguard Worker 
183*9880d681SAndroid Build Coastguard Worker             if (!TRI->isVirtualRegister(Reg) &&
184*9880d681SAndroid Build Coastguard Worker                 TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
185*9880d681SAndroid Build Coastguard Worker               Flags = StateWQM;
186*9880d681SAndroid Build Coastguard Worker               break;
187*9880d681SAndroid Build Coastguard Worker             }
188*9880d681SAndroid Build Coastguard Worker           }
189*9880d681SAndroid Build Coastguard Worker         }
190*9880d681SAndroid Build Coastguard Worker 
191*9880d681SAndroid Build Coastguard Worker         if (!Flags)
192*9880d681SAndroid Build Coastguard Worker           continue;
193*9880d681SAndroid Build Coastguard Worker       }
194*9880d681SAndroid Build Coastguard Worker 
195*9880d681SAndroid Build Coastguard Worker       Instructions[&MI].Needs = Flags;
196*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(&MI);
197*9880d681SAndroid Build Coastguard Worker       GlobalFlags |= Flags;
198*9880d681SAndroid Build Coastguard Worker     }
199*9880d681SAndroid Build Coastguard Worker 
200*9880d681SAndroid Build Coastguard Worker     if (WQMOutputs && MBB.succ_empty()) {
201*9880d681SAndroid Build Coastguard Worker       // This is a prolog shader. Make sure we go back to exact mode at the end.
202*9880d681SAndroid Build Coastguard Worker       Blocks[&MBB].OutNeeds = StateExact;
203*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(&MBB);
204*9880d681SAndroid Build Coastguard Worker       GlobalFlags |= StateExact;
205*9880d681SAndroid Build Coastguard Worker     }
206*9880d681SAndroid Build Coastguard Worker   }
207*9880d681SAndroid Build Coastguard Worker 
208*9880d681SAndroid Build Coastguard Worker   return GlobalFlags;
209*9880d681SAndroid Build Coastguard Worker }
210*9880d681SAndroid Build Coastguard Worker 
propagateInstruction(MachineInstr & MI,std::vector<WorkItem> & Worklist)211*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
212*9880d681SAndroid Build Coastguard Worker                                            std::vector<WorkItem>& Worklist) {
213*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *MBB = MI.getParent();
214*9880d681SAndroid Build Coastguard Worker   InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
215*9880d681SAndroid Build Coastguard Worker   BlockInfo &BI = Blocks[MBB];
216*9880d681SAndroid Build Coastguard Worker 
217*9880d681SAndroid Build Coastguard Worker   // Control flow-type instructions that are followed by WQM computations
218*9880d681SAndroid Build Coastguard Worker   // must themselves be in WQM.
219*9880d681SAndroid Build Coastguard Worker   if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
220*9880d681SAndroid Build Coastguard Worker     Instructions[&MI].Needs = StateWQM;
221*9880d681SAndroid Build Coastguard Worker     II.Needs = StateWQM;
222*9880d681SAndroid Build Coastguard Worker   }
223*9880d681SAndroid Build Coastguard Worker 
224*9880d681SAndroid Build Coastguard Worker   // Propagate to block level
225*9880d681SAndroid Build Coastguard Worker   BI.Needs |= II.Needs;
226*9880d681SAndroid Build Coastguard Worker   if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
227*9880d681SAndroid Build Coastguard Worker     BI.InNeeds |= II.Needs;
228*9880d681SAndroid Build Coastguard Worker     Worklist.push_back(MBB);
229*9880d681SAndroid Build Coastguard Worker   }
230*9880d681SAndroid Build Coastguard Worker 
231*9880d681SAndroid Build Coastguard Worker   // Propagate backwards within block
232*9880d681SAndroid Build Coastguard Worker   if (MachineInstr *PrevMI = MI.getPrevNode()) {
233*9880d681SAndroid Build Coastguard Worker     char InNeeds = II.Needs | II.OutNeeds;
234*9880d681SAndroid Build Coastguard Worker     if (!PrevMI->isPHI()) {
235*9880d681SAndroid Build Coastguard Worker       InstrInfo &PrevII = Instructions[PrevMI];
236*9880d681SAndroid Build Coastguard Worker       if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
237*9880d681SAndroid Build Coastguard Worker         PrevII.OutNeeds |= InNeeds;
238*9880d681SAndroid Build Coastguard Worker         Worklist.push_back(PrevMI);
239*9880d681SAndroid Build Coastguard Worker       }
240*9880d681SAndroid Build Coastguard Worker     }
241*9880d681SAndroid Build Coastguard Worker   }
242*9880d681SAndroid Build Coastguard Worker 
243*9880d681SAndroid Build Coastguard Worker   // Propagate WQM flag to instruction inputs
244*9880d681SAndroid Build Coastguard Worker   assert(II.Needs != (StateWQM | StateExact));
245*9880d681SAndroid Build Coastguard Worker   if (II.Needs != StateWQM)
246*9880d681SAndroid Build Coastguard Worker     return;
247*9880d681SAndroid Build Coastguard Worker 
248*9880d681SAndroid Build Coastguard Worker   for (const MachineOperand &Use : MI.uses()) {
249*9880d681SAndroid Build Coastguard Worker     if (!Use.isReg() || !Use.isUse())
250*9880d681SAndroid Build Coastguard Worker       continue;
251*9880d681SAndroid Build Coastguard Worker 
252*9880d681SAndroid Build Coastguard Worker     // At this point, physical registers appear as inputs or outputs
253*9880d681SAndroid Build Coastguard Worker     // and following them makes no sense (and would in fact be incorrect
254*9880d681SAndroid Build Coastguard Worker     // when the same VGPR is used as both an output and an input that leads
255*9880d681SAndroid Build Coastguard Worker     // to a NeedsWQM instruction).
256*9880d681SAndroid Build Coastguard Worker     //
257*9880d681SAndroid Build Coastguard Worker     // Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
258*9880d681SAndroid Build Coastguard Worker     // have to trace this, in practice it happens for 64-bit computations like
259*9880d681SAndroid Build Coastguard Worker     // pointers where both dwords are followed already anyway.
260*9880d681SAndroid Build Coastguard Worker     if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
261*9880d681SAndroid Build Coastguard Worker       continue;
262*9880d681SAndroid Build Coastguard Worker 
263*9880d681SAndroid Build Coastguard Worker     for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
264*9880d681SAndroid Build Coastguard Worker       InstrInfo &DefII = Instructions[&DefMI];
265*9880d681SAndroid Build Coastguard Worker 
266*9880d681SAndroid Build Coastguard Worker       // Obviously skip if DefMI is already flagged as NeedWQM.
267*9880d681SAndroid Build Coastguard Worker       //
268*9880d681SAndroid Build Coastguard Worker       // The instruction might also be flagged as NeedExact. This happens when
269*9880d681SAndroid Build Coastguard Worker       // the result of an atomic is used in a WQM computation. In this case,
270*9880d681SAndroid Build Coastguard Worker       // the atomic must not run for helper pixels and the WQM result is
271*9880d681SAndroid Build Coastguard Worker       // undefined.
272*9880d681SAndroid Build Coastguard Worker       if (DefII.Needs != 0)
273*9880d681SAndroid Build Coastguard Worker         continue;
274*9880d681SAndroid Build Coastguard Worker 
275*9880d681SAndroid Build Coastguard Worker       DefII.Needs = StateWQM;
276*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(&DefMI);
277*9880d681SAndroid Build Coastguard Worker     }
278*9880d681SAndroid Build Coastguard Worker   }
279*9880d681SAndroid Build Coastguard Worker }
280*9880d681SAndroid Build Coastguard Worker 
propagateBlock(MachineBasicBlock & MBB,std::vector<WorkItem> & Worklist)281*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
282*9880d681SAndroid Build Coastguard Worker                                      std::vector<WorkItem>& Worklist) {
283*9880d681SAndroid Build Coastguard Worker   BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.
284*9880d681SAndroid Build Coastguard Worker 
285*9880d681SAndroid Build Coastguard Worker   // Propagate through instructions
286*9880d681SAndroid Build Coastguard Worker   if (!MBB.empty()) {
287*9880d681SAndroid Build Coastguard Worker     MachineInstr *LastMI = &*MBB.rbegin();
288*9880d681SAndroid Build Coastguard Worker     InstrInfo &LastII = Instructions[LastMI];
289*9880d681SAndroid Build Coastguard Worker     if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
290*9880d681SAndroid Build Coastguard Worker       LastII.OutNeeds |= BI.OutNeeds;
291*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(LastMI);
292*9880d681SAndroid Build Coastguard Worker     }
293*9880d681SAndroid Build Coastguard Worker   }
294*9880d681SAndroid Build Coastguard Worker 
295*9880d681SAndroid Build Coastguard Worker   // Predecessor blocks must provide for our WQM/Exact needs.
296*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Pred : MBB.predecessors()) {
297*9880d681SAndroid Build Coastguard Worker     BlockInfo &PredBI = Blocks[Pred];
298*9880d681SAndroid Build Coastguard Worker     if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
299*9880d681SAndroid Build Coastguard Worker       continue;
300*9880d681SAndroid Build Coastguard Worker 
301*9880d681SAndroid Build Coastguard Worker     PredBI.OutNeeds |= BI.InNeeds;
302*9880d681SAndroid Build Coastguard Worker     PredBI.InNeeds |= BI.InNeeds;
303*9880d681SAndroid Build Coastguard Worker     Worklist.push_back(Pred);
304*9880d681SAndroid Build Coastguard Worker   }
305*9880d681SAndroid Build Coastguard Worker 
306*9880d681SAndroid Build Coastguard Worker   // All successors must be prepared to accept the same set of WQM/Exact data.
307*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Succ : MBB.successors()) {
308*9880d681SAndroid Build Coastguard Worker     BlockInfo &SuccBI = Blocks[Succ];
309*9880d681SAndroid Build Coastguard Worker     if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
310*9880d681SAndroid Build Coastguard Worker       continue;
311*9880d681SAndroid Build Coastguard Worker 
312*9880d681SAndroid Build Coastguard Worker     SuccBI.InNeeds |= BI.OutNeeds;
313*9880d681SAndroid Build Coastguard Worker     Worklist.push_back(Succ);
314*9880d681SAndroid Build Coastguard Worker   }
315*9880d681SAndroid Build Coastguard Worker }
316*9880d681SAndroid Build Coastguard Worker 
analyzeFunction(MachineFunction & MF)317*9880d681SAndroid Build Coastguard Worker char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
318*9880d681SAndroid Build Coastguard Worker   std::vector<WorkItem> Worklist;
319*9880d681SAndroid Build Coastguard Worker   char GlobalFlags = scanInstructions(MF, Worklist);
320*9880d681SAndroid Build Coastguard Worker 
321*9880d681SAndroid Build Coastguard Worker   while (!Worklist.empty()) {
322*9880d681SAndroid Build Coastguard Worker     WorkItem WI = Worklist.back();
323*9880d681SAndroid Build Coastguard Worker     Worklist.pop_back();
324*9880d681SAndroid Build Coastguard Worker 
325*9880d681SAndroid Build Coastguard Worker     if (WI.MI)
326*9880d681SAndroid Build Coastguard Worker       propagateInstruction(*WI.MI, Worklist);
327*9880d681SAndroid Build Coastguard Worker     else
328*9880d681SAndroid Build Coastguard Worker       propagateBlock(*WI.MBB, Worklist);
329*9880d681SAndroid Build Coastguard Worker   }
330*9880d681SAndroid Build Coastguard Worker 
331*9880d681SAndroid Build Coastguard Worker   return GlobalFlags;
332*9880d681SAndroid Build Coastguard Worker }
333*9880d681SAndroid Build Coastguard Worker 
toExact(MachineBasicBlock & MBB,MachineBasicBlock::iterator Before,unsigned SaveWQM,unsigned LiveMaskReg)334*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
335*9880d681SAndroid Build Coastguard Worker                               MachineBasicBlock::iterator Before,
336*9880d681SAndroid Build Coastguard Worker                               unsigned SaveWQM, unsigned LiveMaskReg) {
337*9880d681SAndroid Build Coastguard Worker   if (SaveWQM) {
338*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
339*9880d681SAndroid Build Coastguard Worker             SaveWQM)
340*9880d681SAndroid Build Coastguard Worker         .addReg(LiveMaskReg);
341*9880d681SAndroid Build Coastguard Worker   } else {
342*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
343*9880d681SAndroid Build Coastguard Worker             AMDGPU::EXEC)
344*9880d681SAndroid Build Coastguard Worker         .addReg(AMDGPU::EXEC)
345*9880d681SAndroid Build Coastguard Worker         .addReg(LiveMaskReg);
346*9880d681SAndroid Build Coastguard Worker   }
347*9880d681SAndroid Build Coastguard Worker }
348*9880d681SAndroid Build Coastguard Worker 
toWQM(MachineBasicBlock & MBB,MachineBasicBlock::iterator Before,unsigned SavedWQM)349*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
350*9880d681SAndroid Build Coastguard Worker                             MachineBasicBlock::iterator Before,
351*9880d681SAndroid Build Coastguard Worker                             unsigned SavedWQM) {
352*9880d681SAndroid Build Coastguard Worker   if (SavedWQM) {
353*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
354*9880d681SAndroid Build Coastguard Worker         .addReg(SavedWQM);
355*9880d681SAndroid Build Coastguard Worker   } else {
356*9880d681SAndroid Build Coastguard Worker     BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
357*9880d681SAndroid Build Coastguard Worker             AMDGPU::EXEC)
358*9880d681SAndroid Build Coastguard Worker         .addReg(AMDGPU::EXEC);
359*9880d681SAndroid Build Coastguard Worker   }
360*9880d681SAndroid Build Coastguard Worker }
361*9880d681SAndroid Build Coastguard Worker 
processBlock(MachineBasicBlock & MBB,unsigned LiveMaskReg,bool isEntry)362*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
363*9880d681SAndroid Build Coastguard Worker                                    bool isEntry) {
364*9880d681SAndroid Build Coastguard Worker   auto BII = Blocks.find(&MBB);
365*9880d681SAndroid Build Coastguard Worker   if (BII == Blocks.end())
366*9880d681SAndroid Build Coastguard Worker     return;
367*9880d681SAndroid Build Coastguard Worker 
368*9880d681SAndroid Build Coastguard Worker   const BlockInfo &BI = BII->second;
369*9880d681SAndroid Build Coastguard Worker 
370*9880d681SAndroid Build Coastguard Worker   if (!(BI.InNeeds & StateWQM))
371*9880d681SAndroid Build Coastguard Worker     return;
372*9880d681SAndroid Build Coastguard Worker 
373*9880d681SAndroid Build Coastguard Worker   // This is a non-entry block that is WQM throughout, so no need to do
374*9880d681SAndroid Build Coastguard Worker   // anything.
375*9880d681SAndroid Build Coastguard Worker   if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
376*9880d681SAndroid Build Coastguard Worker     return;
377*9880d681SAndroid Build Coastguard Worker 
378*9880d681SAndroid Build Coastguard Worker   unsigned SavedWQMReg = 0;
379*9880d681SAndroid Build Coastguard Worker   bool WQMFromExec = isEntry;
380*9880d681SAndroid Build Coastguard Worker   char State = isEntry ? StateExact : StateWQM;
381*9880d681SAndroid Build Coastguard Worker 
382*9880d681SAndroid Build Coastguard Worker   auto II = MBB.getFirstNonPHI(), IE = MBB.end();
383*9880d681SAndroid Build Coastguard Worker   while (II != IE) {
384*9880d681SAndroid Build Coastguard Worker     MachineInstr &MI = *II;
385*9880d681SAndroid Build Coastguard Worker     ++II;
386*9880d681SAndroid Build Coastguard Worker 
387*9880d681SAndroid Build Coastguard Worker     // Skip instructions that are not affected by EXEC
388*9880d681SAndroid Build Coastguard Worker     if (TII->isScalarUnit(MI) && !MI.isTerminator())
389*9880d681SAndroid Build Coastguard Worker       continue;
390*9880d681SAndroid Build Coastguard Worker 
391*9880d681SAndroid Build Coastguard Worker     // Generic instructions such as COPY will either disappear by register
392*9880d681SAndroid Build Coastguard Worker     // coalescing or be lowered to SALU or VALU instructions.
393*9880d681SAndroid Build Coastguard Worker     if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
394*9880d681SAndroid Build Coastguard Worker       if (MI.getNumExplicitOperands() >= 1) {
395*9880d681SAndroid Build Coastguard Worker         const MachineOperand &Op = MI.getOperand(0);
396*9880d681SAndroid Build Coastguard Worker         if (Op.isReg()) {
397*9880d681SAndroid Build Coastguard Worker           if (TRI->isSGPRReg(*MRI, Op.getReg())) {
398*9880d681SAndroid Build Coastguard Worker             // SGPR instructions are not affected by EXEC
399*9880d681SAndroid Build Coastguard Worker             continue;
400*9880d681SAndroid Build Coastguard Worker           }
401*9880d681SAndroid Build Coastguard Worker         }
402*9880d681SAndroid Build Coastguard Worker       }
403*9880d681SAndroid Build Coastguard Worker     }
404*9880d681SAndroid Build Coastguard Worker 
405*9880d681SAndroid Build Coastguard Worker     char Needs = 0;
406*9880d681SAndroid Build Coastguard Worker     char OutNeeds = 0;
407*9880d681SAndroid Build Coastguard Worker     auto InstrInfoIt = Instructions.find(&MI);
408*9880d681SAndroid Build Coastguard Worker     if (InstrInfoIt != Instructions.end()) {
409*9880d681SAndroid Build Coastguard Worker       Needs = InstrInfoIt->second.Needs;
410*9880d681SAndroid Build Coastguard Worker       OutNeeds = InstrInfoIt->second.OutNeeds;
411*9880d681SAndroid Build Coastguard Worker 
412*9880d681SAndroid Build Coastguard Worker       // Make sure to switch to Exact mode before the end of the block when
413*9880d681SAndroid Build Coastguard Worker       // Exact and only Exact is needed further downstream.
414*9880d681SAndroid Build Coastguard Worker       if (OutNeeds == StateExact && MI.isTerminator()) {
415*9880d681SAndroid Build Coastguard Worker         assert(Needs == 0);
416*9880d681SAndroid Build Coastguard Worker         Needs = StateExact;
417*9880d681SAndroid Build Coastguard Worker       }
418*9880d681SAndroid Build Coastguard Worker     }
419*9880d681SAndroid Build Coastguard Worker 
420*9880d681SAndroid Build Coastguard Worker     // State switching
421*9880d681SAndroid Build Coastguard Worker     if (Needs && State != Needs) {
422*9880d681SAndroid Build Coastguard Worker       if (Needs == StateExact) {
423*9880d681SAndroid Build Coastguard Worker         assert(!SavedWQMReg);
424*9880d681SAndroid Build Coastguard Worker 
425*9880d681SAndroid Build Coastguard Worker         if (!WQMFromExec && (OutNeeds & StateWQM))
426*9880d681SAndroid Build Coastguard Worker           SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
427*9880d681SAndroid Build Coastguard Worker 
428*9880d681SAndroid Build Coastguard Worker         toExact(MBB, &MI, SavedWQMReg, LiveMaskReg);
429*9880d681SAndroid Build Coastguard Worker       } else {
430*9880d681SAndroid Build Coastguard Worker         assert(WQMFromExec == (SavedWQMReg == 0));
431*9880d681SAndroid Build Coastguard Worker         toWQM(MBB, &MI, SavedWQMReg);
432*9880d681SAndroid Build Coastguard Worker         SavedWQMReg = 0;
433*9880d681SAndroid Build Coastguard Worker       }
434*9880d681SAndroid Build Coastguard Worker 
435*9880d681SAndroid Build Coastguard Worker       State = Needs;
436*9880d681SAndroid Build Coastguard Worker     }
437*9880d681SAndroid Build Coastguard Worker   }
438*9880d681SAndroid Build Coastguard Worker 
439*9880d681SAndroid Build Coastguard Worker   if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
440*9880d681SAndroid Build Coastguard Worker     assert(WQMFromExec == (SavedWQMReg == 0));
441*9880d681SAndroid Build Coastguard Worker     toWQM(MBB, MBB.end(), SavedWQMReg);
442*9880d681SAndroid Build Coastguard Worker   } else if (BI.OutNeeds == StateExact && State != StateExact) {
443*9880d681SAndroid Build Coastguard Worker     toExact(MBB, MBB.end(), 0, LiveMaskReg);
444*9880d681SAndroid Build Coastguard Worker   }
445*9880d681SAndroid Build Coastguard Worker }
446*9880d681SAndroid Build Coastguard Worker 
lowerLiveMaskQueries(unsigned LiveMaskReg)447*9880d681SAndroid Build Coastguard Worker void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
448*9880d681SAndroid Build Coastguard Worker   for (MachineInstr *MI : LiveMaskQueries) {
449*9880d681SAndroid Build Coastguard Worker     const DebugLoc &DL = MI->getDebugLoc();
450*9880d681SAndroid Build Coastguard Worker     unsigned Dest = MI->getOperand(0).getReg();
451*9880d681SAndroid Build Coastguard Worker     BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
452*9880d681SAndroid Build Coastguard Worker         .addReg(LiveMaskReg);
453*9880d681SAndroid Build Coastguard Worker     MI->eraseFromParent();
454*9880d681SAndroid Build Coastguard Worker   }
455*9880d681SAndroid Build Coastguard Worker }
456*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)457*9880d681SAndroid Build Coastguard Worker bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
458*9880d681SAndroid Build Coastguard Worker   if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
459*9880d681SAndroid Build Coastguard Worker     return false;
460*9880d681SAndroid Build Coastguard Worker 
461*9880d681SAndroid Build Coastguard Worker   Instructions.clear();
462*9880d681SAndroid Build Coastguard Worker   Blocks.clear();
463*9880d681SAndroid Build Coastguard Worker   ExecExports.clear();
464*9880d681SAndroid Build Coastguard Worker   LiveMaskQueries.clear();
465*9880d681SAndroid Build Coastguard Worker 
466*9880d681SAndroid Build Coastguard Worker   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
467*9880d681SAndroid Build Coastguard Worker 
468*9880d681SAndroid Build Coastguard Worker   TII = ST.getInstrInfo();
469*9880d681SAndroid Build Coastguard Worker   TRI = &TII->getRegisterInfo();
470*9880d681SAndroid Build Coastguard Worker   MRI = &MF.getRegInfo();
471*9880d681SAndroid Build Coastguard Worker 
472*9880d681SAndroid Build Coastguard Worker   char GlobalFlags = analyzeFunction(MF);
473*9880d681SAndroid Build Coastguard Worker   if (!(GlobalFlags & StateWQM)) {
474*9880d681SAndroid Build Coastguard Worker     lowerLiveMaskQueries(AMDGPU::EXEC);
475*9880d681SAndroid Build Coastguard Worker     return !LiveMaskQueries.empty();
476*9880d681SAndroid Build Coastguard Worker   }
477*9880d681SAndroid Build Coastguard Worker 
478*9880d681SAndroid Build Coastguard Worker   // Store a copy of the original live mask when required
479*9880d681SAndroid Build Coastguard Worker   unsigned LiveMaskReg = 0;
480*9880d681SAndroid Build Coastguard Worker   {
481*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &Entry = MF.front();
482*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
483*9880d681SAndroid Build Coastguard Worker 
484*9880d681SAndroid Build Coastguard Worker     if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
485*9880d681SAndroid Build Coastguard Worker       LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
486*9880d681SAndroid Build Coastguard Worker       BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
487*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC);
488*9880d681SAndroid Build Coastguard Worker     }
489*9880d681SAndroid Build Coastguard Worker 
490*9880d681SAndroid Build Coastguard Worker     if (GlobalFlags == StateWQM) {
491*9880d681SAndroid Build Coastguard Worker       // For a shader that needs only WQM, we can just set it once.
492*9880d681SAndroid Build Coastguard Worker       BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
493*9880d681SAndroid Build Coastguard Worker               AMDGPU::EXEC)
494*9880d681SAndroid Build Coastguard Worker           .addReg(AMDGPU::EXEC);
495*9880d681SAndroid Build Coastguard Worker 
496*9880d681SAndroid Build Coastguard Worker       lowerLiveMaskQueries(LiveMaskReg);
497*9880d681SAndroid Build Coastguard Worker       // EntryMI may become invalid here
498*9880d681SAndroid Build Coastguard Worker       return true;
499*9880d681SAndroid Build Coastguard Worker     }
500*9880d681SAndroid Build Coastguard Worker   }
501*9880d681SAndroid Build Coastguard Worker 
502*9880d681SAndroid Build Coastguard Worker   lowerLiveMaskQueries(LiveMaskReg);
503*9880d681SAndroid Build Coastguard Worker 
504*9880d681SAndroid Build Coastguard Worker   // Handle the general case
505*9880d681SAndroid Build Coastguard Worker   for (auto BII : Blocks)
506*9880d681SAndroid Build Coastguard Worker     processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
507*9880d681SAndroid Build Coastguard Worker 
508*9880d681SAndroid Build Coastguard Worker   return true;
509*9880d681SAndroid Build Coastguard Worker }
510