xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12*9880d681SAndroid Build Coastguard Worker //
13*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
14*9880d681SAndroid Build Coastguard Worker 
15*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
16*9880d681SAndroid Build Coastguard Worker #include "R600ISelLowering.h"
17*9880d681SAndroid Build Coastguard Worker #include "R600InstrInfo.h"
18*9880d681SAndroid Build Coastguard Worker #include "SIFrameLowering.h"
19*9880d681SAndroid Build Coastguard Worker #include "SIISelLowering.h"
20*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallString.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineScheduler.h"
24*9880d681SAndroid Build Coastguard Worker 
25*9880d681SAndroid Build Coastguard Worker using namespace llvm;
26*9880d681SAndroid Build Coastguard Worker 
27*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "amdgpu-subtarget"
28*9880d681SAndroid Build Coastguard Worker 
29*9880d681SAndroid Build Coastguard Worker #define GET_SUBTARGETINFO_ENUM
30*9880d681SAndroid Build Coastguard Worker #define GET_SUBTARGETINFO_TARGET_DESC
31*9880d681SAndroid Build Coastguard Worker #define GET_SUBTARGETINFO_CTOR
32*9880d681SAndroid Build Coastguard Worker #include "AMDGPUGenSubtargetInfo.inc"
33*9880d681SAndroid Build Coastguard Worker 
~AMDGPUSubtarget()34*9880d681SAndroid Build Coastguard Worker AMDGPUSubtarget::~AMDGPUSubtarget() {}
35*9880d681SAndroid Build Coastguard Worker 
36*9880d681SAndroid Build Coastguard Worker AMDGPUSubtarget &
initializeSubtargetDependencies(const Triple & TT,StringRef GPU,StringRef FS)37*9880d681SAndroid Build Coastguard Worker AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38*9880d681SAndroid Build Coastguard Worker                                                  StringRef GPU, StringRef FS) {
39*9880d681SAndroid Build Coastguard Worker   // Determine default and user-specified characteristics
40*9880d681SAndroid Build Coastguard Worker   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41*9880d681SAndroid Build Coastguard Worker   // enabled, but some instructions do not respect them and they run at the
42*9880d681SAndroid Build Coastguard Worker   // double precision rate, so don't enable by default.
43*9880d681SAndroid Build Coastguard Worker   //
44*9880d681SAndroid Build Coastguard Worker   // We want to be able to turn these off, but making this a subtarget feature
45*9880d681SAndroid Build Coastguard Worker   // for SI has the unhelpful behavior that it unsets everything else if you
46*9880d681SAndroid Build Coastguard Worker   // disable it.
47*9880d681SAndroid Build Coastguard Worker 
48*9880d681SAndroid Build Coastguard Worker   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
49*9880d681SAndroid Build Coastguard Worker   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
50*9880d681SAndroid Build Coastguard Worker     FullFS += "+flat-for-global,+unaligned-buffer-access,";
51*9880d681SAndroid Build Coastguard Worker   FullFS += FS;
52*9880d681SAndroid Build Coastguard Worker 
53*9880d681SAndroid Build Coastguard Worker   ParseSubtargetFeatures(GPU, FullFS);
54*9880d681SAndroid Build Coastguard Worker 
55*9880d681SAndroid Build Coastguard Worker   // FIXME: I don't think think Evergreen has any useful support for
56*9880d681SAndroid Build Coastguard Worker   // denormals, but should be checked. Should we issue a warning somewhere
57*9880d681SAndroid Build Coastguard Worker   // if someone tries to enable these?
58*9880d681SAndroid Build Coastguard Worker   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
59*9880d681SAndroid Build Coastguard Worker     FP32Denormals = false;
60*9880d681SAndroid Build Coastguard Worker     FP64Denormals = false;
61*9880d681SAndroid Build Coastguard Worker   }
62*9880d681SAndroid Build Coastguard Worker 
63*9880d681SAndroid Build Coastguard Worker   // Set defaults if needed.
64*9880d681SAndroid Build Coastguard Worker   if (MaxPrivateElementSize == 0)
65*9880d681SAndroid Build Coastguard Worker     MaxPrivateElementSize = 4;
66*9880d681SAndroid Build Coastguard Worker 
67*9880d681SAndroid Build Coastguard Worker   return *this;
68*9880d681SAndroid Build Coastguard Worker }
69*9880d681SAndroid Build Coastguard Worker 
AMDGPUSubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)70*9880d681SAndroid Build Coastguard Worker AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
71*9880d681SAndroid Build Coastguard Worker                                  const TargetMachine &TM)
72*9880d681SAndroid Build Coastguard Worker   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73*9880d681SAndroid Build Coastguard Worker     TargetTriple(TT),
74*9880d681SAndroid Build Coastguard Worker     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75*9880d681SAndroid Build Coastguard Worker     IsaVersion(ISAVersion0_0_0),
76*9880d681SAndroid Build Coastguard Worker     WavefrontSize(64),
77*9880d681SAndroid Build Coastguard Worker     LocalMemorySize(0),
78*9880d681SAndroid Build Coastguard Worker     LDSBankCount(0),
79*9880d681SAndroid Build Coastguard Worker     MaxPrivateElementSize(0),
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker     FastFMAF32(false),
82*9880d681SAndroid Build Coastguard Worker     HalfRate64Ops(false),
83*9880d681SAndroid Build Coastguard Worker 
84*9880d681SAndroid Build Coastguard Worker     FP32Denormals(false),
85*9880d681SAndroid Build Coastguard Worker     FP64Denormals(false),
86*9880d681SAndroid Build Coastguard Worker     FPExceptions(false),
87*9880d681SAndroid Build Coastguard Worker     FlatForGlobal(false),
88*9880d681SAndroid Build Coastguard Worker     UnalignedBufferAccess(false),
89*9880d681SAndroid Build Coastguard Worker 
90*9880d681SAndroid Build Coastguard Worker     EnableXNACK(false),
91*9880d681SAndroid Build Coastguard Worker     DebuggerInsertNops(false),
92*9880d681SAndroid Build Coastguard Worker     DebuggerReserveRegs(false),
93*9880d681SAndroid Build Coastguard Worker     DebuggerEmitPrologue(false),
94*9880d681SAndroid Build Coastguard Worker 
95*9880d681SAndroid Build Coastguard Worker     EnableVGPRSpilling(false),
96*9880d681SAndroid Build Coastguard Worker     EnablePromoteAlloca(false),
97*9880d681SAndroid Build Coastguard Worker     EnableLoadStoreOpt(false),
98*9880d681SAndroid Build Coastguard Worker     EnableUnsafeDSOffsetFolding(false),
99*9880d681SAndroid Build Coastguard Worker     EnableSIScheduler(false),
100*9880d681SAndroid Build Coastguard Worker     DumpCode(false),
101*9880d681SAndroid Build Coastguard Worker 
102*9880d681SAndroid Build Coastguard Worker     FP64(false),
103*9880d681SAndroid Build Coastguard Worker     IsGCN(false),
104*9880d681SAndroid Build Coastguard Worker     GCN1Encoding(false),
105*9880d681SAndroid Build Coastguard Worker     GCN3Encoding(false),
106*9880d681SAndroid Build Coastguard Worker     CIInsts(false),
107*9880d681SAndroid Build Coastguard Worker     SGPRInitBug(false),
108*9880d681SAndroid Build Coastguard Worker     HasSMemRealTime(false),
109*9880d681SAndroid Build Coastguard Worker     Has16BitInsts(false),
110*9880d681SAndroid Build Coastguard Worker     FlatAddressSpace(false),
111*9880d681SAndroid Build Coastguard Worker 
112*9880d681SAndroid Build Coastguard Worker     R600ALUInst(false),
113*9880d681SAndroid Build Coastguard Worker     CaymanISA(false),
114*9880d681SAndroid Build Coastguard Worker     CFALUBug(false),
115*9880d681SAndroid Build Coastguard Worker     HasVertexCache(false),
116*9880d681SAndroid Build Coastguard Worker     TexVTXClauseSize(0),
117*9880d681SAndroid Build Coastguard Worker 
118*9880d681SAndroid Build Coastguard Worker     FeatureDisable(false),
119*9880d681SAndroid Build Coastguard Worker     InstrItins(getInstrItineraryForCPU(GPU)) {
120*9880d681SAndroid Build Coastguard Worker   initializeSubtargetDependencies(TT, GPU, FS);
121*9880d681SAndroid Build Coastguard Worker }
122*9880d681SAndroid Build Coastguard Worker 
123*9880d681SAndroid Build Coastguard Worker // FIXME: These limits are for SI. Did they change with the larger maximum LDS
124*9880d681SAndroid Build Coastguard Worker // size?
getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const125*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
126*9880d681SAndroid Build Coastguard Worker   switch (NWaves) {
127*9880d681SAndroid Build Coastguard Worker   case 10:
128*9880d681SAndroid Build Coastguard Worker     return 1638;
129*9880d681SAndroid Build Coastguard Worker   case 9:
130*9880d681SAndroid Build Coastguard Worker     return 1820;
131*9880d681SAndroid Build Coastguard Worker   case 8:
132*9880d681SAndroid Build Coastguard Worker     return 2048;
133*9880d681SAndroid Build Coastguard Worker   case 7:
134*9880d681SAndroid Build Coastguard Worker     return 2340;
135*9880d681SAndroid Build Coastguard Worker   case 6:
136*9880d681SAndroid Build Coastguard Worker     return 2730;
137*9880d681SAndroid Build Coastguard Worker   case 5:
138*9880d681SAndroid Build Coastguard Worker     return 3276;
139*9880d681SAndroid Build Coastguard Worker   case 4:
140*9880d681SAndroid Build Coastguard Worker     return 4096;
141*9880d681SAndroid Build Coastguard Worker   case 3:
142*9880d681SAndroid Build Coastguard Worker     return 5461;
143*9880d681SAndroid Build Coastguard Worker   case 2:
144*9880d681SAndroid Build Coastguard Worker     return 8192;
145*9880d681SAndroid Build Coastguard Worker   default:
146*9880d681SAndroid Build Coastguard Worker     return getLocalMemorySize();
147*9880d681SAndroid Build Coastguard Worker   }
148*9880d681SAndroid Build Coastguard Worker }
149*9880d681SAndroid Build Coastguard Worker 
getOccupancyWithLocalMemSize(uint32_t Bytes) const150*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
151*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 1638)
152*9880d681SAndroid Build Coastguard Worker     return 10;
153*9880d681SAndroid Build Coastguard Worker 
154*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 1820)
155*9880d681SAndroid Build Coastguard Worker     return 9;
156*9880d681SAndroid Build Coastguard Worker 
157*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 2048)
158*9880d681SAndroid Build Coastguard Worker     return 8;
159*9880d681SAndroid Build Coastguard Worker 
160*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 2340)
161*9880d681SAndroid Build Coastguard Worker     return 7;
162*9880d681SAndroid Build Coastguard Worker 
163*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 2730)
164*9880d681SAndroid Build Coastguard Worker     return 6;
165*9880d681SAndroid Build Coastguard Worker 
166*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 3276)
167*9880d681SAndroid Build Coastguard Worker     return 5;
168*9880d681SAndroid Build Coastguard Worker 
169*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 4096)
170*9880d681SAndroid Build Coastguard Worker     return 4;
171*9880d681SAndroid Build Coastguard Worker 
172*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 5461)
173*9880d681SAndroid Build Coastguard Worker     return 3;
174*9880d681SAndroid Build Coastguard Worker 
175*9880d681SAndroid Build Coastguard Worker   if (Bytes <= 8192)
176*9880d681SAndroid Build Coastguard Worker     return 2;
177*9880d681SAndroid Build Coastguard Worker 
178*9880d681SAndroid Build Coastguard Worker   return 1;
179*9880d681SAndroid Build Coastguard Worker }
180*9880d681SAndroid Build Coastguard Worker 
R600Subtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)181*9880d681SAndroid Build Coastguard Worker R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
182*9880d681SAndroid Build Coastguard Worker                              const TargetMachine &TM) :
183*9880d681SAndroid Build Coastguard Worker   AMDGPUSubtarget(TT, GPU, FS, TM),
184*9880d681SAndroid Build Coastguard Worker   InstrInfo(*this),
185*9880d681SAndroid Build Coastguard Worker   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
186*9880d681SAndroid Build Coastguard Worker   TLInfo(TM, *this) {}
187*9880d681SAndroid Build Coastguard Worker 
SISubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)188*9880d681SAndroid Build Coastguard Worker SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
189*9880d681SAndroid Build Coastguard Worker                          const TargetMachine &TM) :
190*9880d681SAndroid Build Coastguard Worker   AMDGPUSubtarget(TT, GPU, FS, TM),
191*9880d681SAndroid Build Coastguard Worker   InstrInfo(*this),
192*9880d681SAndroid Build Coastguard Worker   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
193*9880d681SAndroid Build Coastguard Worker   TLInfo(TM, *this),
194*9880d681SAndroid Build Coastguard Worker   GISel() {}
195*9880d681SAndroid Build Coastguard Worker 
getStackEntrySize() const196*9880d681SAndroid Build Coastguard Worker unsigned R600Subtarget::getStackEntrySize() const {
197*9880d681SAndroid Build Coastguard Worker   switch (getWavefrontSize()) {
198*9880d681SAndroid Build Coastguard Worker   case 16:
199*9880d681SAndroid Build Coastguard Worker     return 8;
200*9880d681SAndroid Build Coastguard Worker   case 32:
201*9880d681SAndroid Build Coastguard Worker     return hasCaymanISA() ? 4 : 8;
202*9880d681SAndroid Build Coastguard Worker   case 64:
203*9880d681SAndroid Build Coastguard Worker     return 4;
204*9880d681SAndroid Build Coastguard Worker   default:
205*9880d681SAndroid Build Coastguard Worker     llvm_unreachable("Illegal wavefront size.");
206*9880d681SAndroid Build Coastguard Worker   }
207*9880d681SAndroid Build Coastguard Worker }
208*9880d681SAndroid Build Coastguard Worker 
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const209*9880d681SAndroid Build Coastguard Worker void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
210*9880d681SAndroid Build Coastguard Worker                                       unsigned NumRegionInstrs) const {
211*9880d681SAndroid Build Coastguard Worker   // Track register pressure so the scheduler can try to decrease
212*9880d681SAndroid Build Coastguard Worker   // pressure once register usage is above the threshold defined by
213*9880d681SAndroid Build Coastguard Worker   // SIRegisterInfo::getRegPressureSetLimit()
214*9880d681SAndroid Build Coastguard Worker   Policy.ShouldTrackPressure = true;
215*9880d681SAndroid Build Coastguard Worker 
216*9880d681SAndroid Build Coastguard Worker   // Enabling both top down and bottom up scheduling seems to give us less
217*9880d681SAndroid Build Coastguard Worker   // register spills than just using one of these approaches on its own.
218*9880d681SAndroid Build Coastguard Worker   Policy.OnlyTopDown = false;
219*9880d681SAndroid Build Coastguard Worker   Policy.OnlyBottomUp = false;
220*9880d681SAndroid Build Coastguard Worker 
221*9880d681SAndroid Build Coastguard Worker   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
222*9880d681SAndroid Build Coastguard Worker   if (!enableSIScheduler())
223*9880d681SAndroid Build Coastguard Worker     Policy.ShouldTrackLaneMasks = true;
224*9880d681SAndroid Build Coastguard Worker }
225*9880d681SAndroid Build Coastguard Worker 
isVGPRSpillingEnabled(const Function & F) const226*9880d681SAndroid Build Coastguard Worker bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
227*9880d681SAndroid Build Coastguard Worker   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
228*9880d681SAndroid Build Coastguard Worker }
229*9880d681SAndroid Build Coastguard Worker 
getAmdKernelCodeChipID() const230*9880d681SAndroid Build Coastguard Worker unsigned SISubtarget::getAmdKernelCodeChipID() const {
231*9880d681SAndroid Build Coastguard Worker   switch (getGeneration()) {
232*9880d681SAndroid Build Coastguard Worker   case SEA_ISLANDS:
233*9880d681SAndroid Build Coastguard Worker     return 12;
234*9880d681SAndroid Build Coastguard Worker   default:
235*9880d681SAndroid Build Coastguard Worker     llvm_unreachable("ChipID unknown");
236*9880d681SAndroid Build Coastguard Worker   }
237*9880d681SAndroid Build Coastguard Worker }
238*9880d681SAndroid Build Coastguard Worker 
getIsaVersion() const239*9880d681SAndroid Build Coastguard Worker AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
240*9880d681SAndroid Build Coastguard Worker   return AMDGPU::getIsaVersion(getFeatureBits());
241*9880d681SAndroid Build Coastguard Worker }
242