xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker /// \file
11*9880d681SAndroid Build Coastguard Worker ///
12*9880d681SAndroid Build Coastguard Worker /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13*9880d681SAndroid Build Coastguard Worker /// code.  When passed an MCAsmStreamer it prints assembly and when passed
14*9880d681SAndroid Build Coastguard Worker /// an MCObjectStreamer it outputs binary code.
15*9880d681SAndroid Build Coastguard Worker //
16*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
17*9880d681SAndroid Build Coastguard Worker //
18*9880d681SAndroid Build Coastguard Worker 
19*9880d681SAndroid Build Coastguard Worker #include "AMDGPUAsmPrinter.h"
20*9880d681SAndroid Build Coastguard Worker #include "MCTargetDesc/AMDGPUTargetStreamer.h"
21*9880d681SAndroid Build Coastguard Worker #include "InstPrinter/AMDGPUInstPrinter.h"
22*9880d681SAndroid Build Coastguard Worker #include "Utils/AMDGPUBaseInfo.h"
23*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
24*9880d681SAndroid Build Coastguard Worker #include "AMDKernelCodeT.h"
25*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
26*9880d681SAndroid Build Coastguard Worker #include "R600Defines.h"
27*9880d681SAndroid Build Coastguard Worker #include "R600MachineFunctionInfo.h"
28*9880d681SAndroid Build Coastguard Worker #include "R600RegisterInfo.h"
29*9880d681SAndroid Build Coastguard Worker #include "SIDefines.h"
30*9880d681SAndroid Build Coastguard Worker #include "SIMachineFunctionInfo.h"
31*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
32*9880d681SAndroid Build Coastguard Worker #include "SIRegisterInfo.h"
33*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFrameInfo.h"
34*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/DiagnosticInfo.h"
35*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCContext.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCSectionELF.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCStreamer.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/ELF.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/MathExtras.h"
40*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/TargetRegistry.h"
41*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLoweringObjectFile.h"
42*9880d681SAndroid Build Coastguard Worker 
43*9880d681SAndroid Build Coastguard Worker using namespace llvm;
44*9880d681SAndroid Build Coastguard Worker 
45*9880d681SAndroid Build Coastguard Worker // TODO: This should get the default rounding mode from the kernel. We just set
46*9880d681SAndroid Build Coastguard Worker // the default here, but this could change if the OpenCL rounding mode pragmas
47*9880d681SAndroid Build Coastguard Worker // are used.
48*9880d681SAndroid Build Coastguard Worker //
49*9880d681SAndroid Build Coastguard Worker // The denormal mode here should match what is reported by the OpenCL runtime
50*9880d681SAndroid Build Coastguard Worker // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
51*9880d681SAndroid Build Coastguard Worker // can also be override to flush with the -cl-denorms-are-zero compiler flag.
52*9880d681SAndroid Build Coastguard Worker //
53*9880d681SAndroid Build Coastguard Worker // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
54*9880d681SAndroid Build Coastguard Worker // precision, and leaves single precision to flush all and does not report
55*9880d681SAndroid Build Coastguard Worker // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
56*9880d681SAndroid Build Coastguard Worker // CL_FP_DENORM for both.
57*9880d681SAndroid Build Coastguard Worker //
58*9880d681SAndroid Build Coastguard Worker // FIXME: It seems some instructions do not support single precision denormals
59*9880d681SAndroid Build Coastguard Worker // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
60*9880d681SAndroid Build Coastguard Worker // and sin_f32, cos_f32 on most parts).
61*9880d681SAndroid Build Coastguard Worker 
62*9880d681SAndroid Build Coastguard Worker // We want to use these instructions, and using fp32 denormals also causes
63*9880d681SAndroid Build Coastguard Worker // instructions to run at the double precision rate for the device so it's
64*9880d681SAndroid Build Coastguard Worker // probably best to just report no single precision denormals.
getFPMode(const MachineFunction & F)65*9880d681SAndroid Build Coastguard Worker static uint32_t getFPMode(const MachineFunction &F) {
66*9880d681SAndroid Build Coastguard Worker   const SISubtarget& ST = F.getSubtarget<SISubtarget>();
67*9880d681SAndroid Build Coastguard Worker   // TODO: Is there any real use for the flush in only / flush out only modes?
68*9880d681SAndroid Build Coastguard Worker 
69*9880d681SAndroid Build Coastguard Worker   uint32_t FP32Denormals =
70*9880d681SAndroid Build Coastguard Worker     ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
71*9880d681SAndroid Build Coastguard Worker 
72*9880d681SAndroid Build Coastguard Worker   uint32_t FP64Denormals =
73*9880d681SAndroid Build Coastguard Worker     ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
74*9880d681SAndroid Build Coastguard Worker 
75*9880d681SAndroid Build Coastguard Worker   return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
76*9880d681SAndroid Build Coastguard Worker          FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
77*9880d681SAndroid Build Coastguard Worker          FP_DENORM_MODE_SP(FP32Denormals) |
78*9880d681SAndroid Build Coastguard Worker          FP_DENORM_MODE_DP(FP64Denormals);
79*9880d681SAndroid Build Coastguard Worker }
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker static AsmPrinter *
createAMDGPUAsmPrinterPass(TargetMachine & tm,std::unique_ptr<MCStreamer> && Streamer)82*9880d681SAndroid Build Coastguard Worker createAMDGPUAsmPrinterPass(TargetMachine &tm,
83*9880d681SAndroid Build Coastguard Worker                            std::unique_ptr<MCStreamer> &&Streamer) {
84*9880d681SAndroid Build Coastguard Worker   return new AMDGPUAsmPrinter(tm, std::move(Streamer));
85*9880d681SAndroid Build Coastguard Worker }
86*9880d681SAndroid Build Coastguard Worker 
LLVMInitializeAMDGPUAsmPrinter()87*9880d681SAndroid Build Coastguard Worker extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
88*9880d681SAndroid Build Coastguard Worker   TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
89*9880d681SAndroid Build Coastguard Worker   TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
90*9880d681SAndroid Build Coastguard Worker }
91*9880d681SAndroid Build Coastguard Worker 
AMDGPUAsmPrinter(TargetMachine & TM,std::unique_ptr<MCStreamer> Streamer)92*9880d681SAndroid Build Coastguard Worker AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
93*9880d681SAndroid Build Coastguard Worker                                    std::unique_ptr<MCStreamer> Streamer)
94*9880d681SAndroid Build Coastguard Worker     : AsmPrinter(TM, std::move(Streamer)) {}
95*9880d681SAndroid Build Coastguard Worker 
EmitStartOfAsmFile(Module & M)96*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
97*9880d681SAndroid Build Coastguard Worker   if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
98*9880d681SAndroid Build Coastguard Worker     return;
99*9880d681SAndroid Build Coastguard Worker 
100*9880d681SAndroid Build Coastguard Worker   // Need to construct an MCSubtargetInfo here in case we have no functions
101*9880d681SAndroid Build Coastguard Worker   // in the module.
102*9880d681SAndroid Build Coastguard Worker   std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
103*9880d681SAndroid Build Coastguard Worker         TM.getTargetTriple().str(), TM.getTargetCPU(),
104*9880d681SAndroid Build Coastguard Worker         TM.getTargetFeatureString()));
105*9880d681SAndroid Build Coastguard Worker 
106*9880d681SAndroid Build Coastguard Worker   AMDGPUTargetStreamer *TS =
107*9880d681SAndroid Build Coastguard Worker       static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
108*9880d681SAndroid Build Coastguard Worker 
109*9880d681SAndroid Build Coastguard Worker   TS->EmitDirectiveHSACodeObjectVersion(2, 1);
110*9880d681SAndroid Build Coastguard Worker 
111*9880d681SAndroid Build Coastguard Worker   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
112*9880d681SAndroid Build Coastguard Worker   TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
113*9880d681SAndroid Build Coastguard Worker                                     "AMD", "AMDGPU");
114*9880d681SAndroid Build Coastguard Worker }
115*9880d681SAndroid Build Coastguard Worker 
EmitFunctionBodyStart()116*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
117*9880d681SAndroid Build Coastguard Worker   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
118*9880d681SAndroid Build Coastguard Worker   SIProgramInfo KernelInfo;
119*9880d681SAndroid Build Coastguard Worker   if (STM.isAmdHsaOS()) {
120*9880d681SAndroid Build Coastguard Worker     getSIProgramInfo(KernelInfo, *MF);
121*9880d681SAndroid Build Coastguard Worker     EmitAmdKernelCodeT(*MF, KernelInfo);
122*9880d681SAndroid Build Coastguard Worker   }
123*9880d681SAndroid Build Coastguard Worker }
124*9880d681SAndroid Build Coastguard Worker 
EmitFunctionEntryLabel()125*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
126*9880d681SAndroid Build Coastguard Worker   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
127*9880d681SAndroid Build Coastguard Worker   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
128*9880d681SAndroid Build Coastguard Worker   if (MFI->isKernel() && STM.isAmdHsaOS()) {
129*9880d681SAndroid Build Coastguard Worker     AMDGPUTargetStreamer *TS =
130*9880d681SAndroid Build Coastguard Worker         static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
131*9880d681SAndroid Build Coastguard Worker     TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
132*9880d681SAndroid Build Coastguard Worker                              ELF::STT_AMDGPU_HSA_KERNEL);
133*9880d681SAndroid Build Coastguard Worker   }
134*9880d681SAndroid Build Coastguard Worker 
135*9880d681SAndroid Build Coastguard Worker   AsmPrinter::EmitFunctionEntryLabel();
136*9880d681SAndroid Build Coastguard Worker }
137*9880d681SAndroid Build Coastguard Worker 
EmitGlobalVariable(const GlobalVariable * GV)138*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
139*9880d681SAndroid Build Coastguard Worker 
140*9880d681SAndroid Build Coastguard Worker   // Group segment variables aren't emitted in HSA.
141*9880d681SAndroid Build Coastguard Worker   if (AMDGPU::isGroupSegment(GV))
142*9880d681SAndroid Build Coastguard Worker     return;
143*9880d681SAndroid Build Coastguard Worker 
144*9880d681SAndroid Build Coastguard Worker   AsmPrinter::EmitGlobalVariable(GV);
145*9880d681SAndroid Build Coastguard Worker }
146*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)147*9880d681SAndroid Build Coastguard Worker bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
148*9880d681SAndroid Build Coastguard Worker 
149*9880d681SAndroid Build Coastguard Worker   // The starting address of all shader programs must be 256 bytes aligned.
150*9880d681SAndroid Build Coastguard Worker   MF.setAlignment(8);
151*9880d681SAndroid Build Coastguard Worker 
152*9880d681SAndroid Build Coastguard Worker   SetupMachineFunction(MF);
153*9880d681SAndroid Build Coastguard Worker 
154*9880d681SAndroid Build Coastguard Worker   MCContext &Context = getObjFileLowering().getContext();
155*9880d681SAndroid Build Coastguard Worker   MCSectionELF *ConfigSection =
156*9880d681SAndroid Build Coastguard Worker       Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
157*9880d681SAndroid Build Coastguard Worker   OutStreamer->SwitchSection(ConfigSection);
158*9880d681SAndroid Build Coastguard Worker 
159*9880d681SAndroid Build Coastguard Worker   const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
160*9880d681SAndroid Build Coastguard Worker   SIProgramInfo KernelInfo;
161*9880d681SAndroid Build Coastguard Worker   if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
162*9880d681SAndroid Build Coastguard Worker     getSIProgramInfo(KernelInfo, MF);
163*9880d681SAndroid Build Coastguard Worker     if (!STM.isAmdHsaOS()) {
164*9880d681SAndroid Build Coastguard Worker       EmitProgramInfoSI(MF, KernelInfo);
165*9880d681SAndroid Build Coastguard Worker     }
166*9880d681SAndroid Build Coastguard Worker   } else {
167*9880d681SAndroid Build Coastguard Worker     EmitProgramInfoR600(MF);
168*9880d681SAndroid Build Coastguard Worker   }
169*9880d681SAndroid Build Coastguard Worker 
170*9880d681SAndroid Build Coastguard Worker   DisasmLines.clear();
171*9880d681SAndroid Build Coastguard Worker   HexLines.clear();
172*9880d681SAndroid Build Coastguard Worker   DisasmLineMaxLen = 0;
173*9880d681SAndroid Build Coastguard Worker 
174*9880d681SAndroid Build Coastguard Worker   EmitFunctionBody();
175*9880d681SAndroid Build Coastguard Worker 
176*9880d681SAndroid Build Coastguard Worker   if (isVerbose()) {
177*9880d681SAndroid Build Coastguard Worker     MCSectionELF *CommentSection =
178*9880d681SAndroid Build Coastguard Worker         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
179*9880d681SAndroid Build Coastguard Worker     OutStreamer->SwitchSection(CommentSection);
180*9880d681SAndroid Build Coastguard Worker 
181*9880d681SAndroid Build Coastguard Worker     if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
182*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" Kernel info:", false);
183*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
184*9880d681SAndroid Build Coastguard Worker                                   false);
185*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
186*9880d681SAndroid Build Coastguard Worker                                   false);
187*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
188*9880d681SAndroid Build Coastguard Worker                                   false);
189*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
190*9880d681SAndroid Build Coastguard Worker                                   false);
191*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
192*9880d681SAndroid Build Coastguard Worker                                   false);
193*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
194*9880d681SAndroid Build Coastguard Worker                                   false);
195*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
196*9880d681SAndroid Build Coastguard Worker                                   " bytes/workgroup (compile time only)", false);
197*9880d681SAndroid Build Coastguard Worker 
198*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
199*9880d681SAndroid Build Coastguard Worker                                   false);
200*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
201*9880d681SAndroid Build Coastguard Worker                                   false);
202*9880d681SAndroid Build Coastguard Worker 
203*9880d681SAndroid Build Coastguard Worker       if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
204*9880d681SAndroid Build Coastguard Worker         OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
205*9880d681SAndroid Build Coastguard Worker                                     Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
206*9880d681SAndroid Build Coastguard Worker         OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
207*9880d681SAndroid Build Coastguard Worker                                     Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
208*9880d681SAndroid Build Coastguard Worker       }
209*9880d681SAndroid Build Coastguard Worker 
210*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
211*9880d681SAndroid Build Coastguard Worker                                   Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
212*9880d681SAndroid Build Coastguard Worker                                   false);
213*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
214*9880d681SAndroid Build Coastguard Worker                                   Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
215*9880d681SAndroid Build Coastguard Worker                                   false);
216*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
217*9880d681SAndroid Build Coastguard Worker                                   Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
218*9880d681SAndroid Build Coastguard Worker                                   false);
219*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
220*9880d681SAndroid Build Coastguard Worker                                   Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
221*9880d681SAndroid Build Coastguard Worker                                   false);
222*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
223*9880d681SAndroid Build Coastguard Worker                                   Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
224*9880d681SAndroid Build Coastguard Worker                                   false);
225*9880d681SAndroid Build Coastguard Worker 
226*9880d681SAndroid Build Coastguard Worker     } else {
227*9880d681SAndroid Build Coastguard Worker       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
228*9880d681SAndroid Build Coastguard Worker       OutStreamer->emitRawComment(
229*9880d681SAndroid Build Coastguard Worker         Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
230*9880d681SAndroid Build Coastguard Worker     }
231*9880d681SAndroid Build Coastguard Worker   }
232*9880d681SAndroid Build Coastguard Worker 
233*9880d681SAndroid Build Coastguard Worker   if (STM.dumpCode()) {
234*9880d681SAndroid Build Coastguard Worker 
235*9880d681SAndroid Build Coastguard Worker     OutStreamer->SwitchSection(
236*9880d681SAndroid Build Coastguard Worker         Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
237*9880d681SAndroid Build Coastguard Worker 
238*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0; i < DisasmLines.size(); ++i) {
239*9880d681SAndroid Build Coastguard Worker       std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
240*9880d681SAndroid Build Coastguard Worker       Comment += " ; " + HexLines[i] + "\n";
241*9880d681SAndroid Build Coastguard Worker 
242*9880d681SAndroid Build Coastguard Worker       OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
243*9880d681SAndroid Build Coastguard Worker       OutStreamer->EmitBytes(StringRef(Comment));
244*9880d681SAndroid Build Coastguard Worker     }
245*9880d681SAndroid Build Coastguard Worker   }
246*9880d681SAndroid Build Coastguard Worker 
247*9880d681SAndroid Build Coastguard Worker   return false;
248*9880d681SAndroid Build Coastguard Worker }
249*9880d681SAndroid Build Coastguard Worker 
EmitProgramInfoR600(const MachineFunction & MF)250*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
251*9880d681SAndroid Build Coastguard Worker   unsigned MaxGPR = 0;
252*9880d681SAndroid Build Coastguard Worker   bool killPixel = false;
253*9880d681SAndroid Build Coastguard Worker   const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
254*9880d681SAndroid Build Coastguard Worker   const R600RegisterInfo *RI = STM.getRegisterInfo();
255*9880d681SAndroid Build Coastguard Worker   const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
256*9880d681SAndroid Build Coastguard Worker 
257*9880d681SAndroid Build Coastguard Worker   for (const MachineBasicBlock &MBB : MF) {
258*9880d681SAndroid Build Coastguard Worker     for (const MachineInstr &MI : MBB) {
259*9880d681SAndroid Build Coastguard Worker       if (MI.getOpcode() == AMDGPU::KILLGT)
260*9880d681SAndroid Build Coastguard Worker         killPixel = true;
261*9880d681SAndroid Build Coastguard Worker       unsigned numOperands = MI.getNumOperands();
262*9880d681SAndroid Build Coastguard Worker       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
263*9880d681SAndroid Build Coastguard Worker         const MachineOperand &MO = MI.getOperand(op_idx);
264*9880d681SAndroid Build Coastguard Worker         if (!MO.isReg())
265*9880d681SAndroid Build Coastguard Worker           continue;
266*9880d681SAndroid Build Coastguard Worker         unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
267*9880d681SAndroid Build Coastguard Worker 
268*9880d681SAndroid Build Coastguard Worker         // Register with value > 127 aren't GPR
269*9880d681SAndroid Build Coastguard Worker         if (HWReg > 127)
270*9880d681SAndroid Build Coastguard Worker           continue;
271*9880d681SAndroid Build Coastguard Worker         MaxGPR = std::max(MaxGPR, HWReg);
272*9880d681SAndroid Build Coastguard Worker       }
273*9880d681SAndroid Build Coastguard Worker     }
274*9880d681SAndroid Build Coastguard Worker   }
275*9880d681SAndroid Build Coastguard Worker 
276*9880d681SAndroid Build Coastguard Worker   unsigned RsrcReg;
277*9880d681SAndroid Build Coastguard Worker   if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
278*9880d681SAndroid Build Coastguard Worker     // Evergreen / Northern Islands
279*9880d681SAndroid Build Coastguard Worker     switch (MF.getFunction()->getCallingConv()) {
280*9880d681SAndroid Build Coastguard Worker     default: // Fall through
281*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
282*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
283*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
284*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_VS: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
285*9880d681SAndroid Build Coastguard Worker     }
286*9880d681SAndroid Build Coastguard Worker   } else {
287*9880d681SAndroid Build Coastguard Worker     // R600 / R700
288*9880d681SAndroid Build Coastguard Worker     switch (MF.getFunction()->getCallingConv()) {
289*9880d681SAndroid Build Coastguard Worker     default: // Fall through
290*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_GS: // Fall through
291*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_CS: // Fall through
292*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
293*9880d681SAndroid Build Coastguard Worker     case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
294*9880d681SAndroid Build Coastguard Worker     }
295*9880d681SAndroid Build Coastguard Worker   }
296*9880d681SAndroid Build Coastguard Worker 
297*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(RsrcReg, 4);
298*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
299*9880d681SAndroid Build Coastguard Worker                            S_STACK_SIZE(MFI->StackSize), 4);
300*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
301*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
302*9880d681SAndroid Build Coastguard Worker 
303*9880d681SAndroid Build Coastguard Worker   if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
304*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
305*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
306*9880d681SAndroid Build Coastguard Worker   }
307*9880d681SAndroid Build Coastguard Worker }
308*9880d681SAndroid Build Coastguard Worker 
getSIProgramInfo(SIProgramInfo & ProgInfo,const MachineFunction & MF) const309*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
310*9880d681SAndroid Build Coastguard Worker                                         const MachineFunction &MF) const {
311*9880d681SAndroid Build Coastguard Worker   const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
312*9880d681SAndroid Build Coastguard Worker   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
313*9880d681SAndroid Build Coastguard Worker   uint64_t CodeSize = 0;
314*9880d681SAndroid Build Coastguard Worker   unsigned MaxSGPR = 0;
315*9880d681SAndroid Build Coastguard Worker   unsigned MaxVGPR = 0;
316*9880d681SAndroid Build Coastguard Worker   bool VCCUsed = false;
317*9880d681SAndroid Build Coastguard Worker   bool FlatUsed = false;
318*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo *RI = STM.getRegisterInfo();
319*9880d681SAndroid Build Coastguard Worker   const SIInstrInfo *TII = STM.getInstrInfo();
320*9880d681SAndroid Build Coastguard Worker 
321*9880d681SAndroid Build Coastguard Worker   for (const MachineBasicBlock &MBB : MF) {
322*9880d681SAndroid Build Coastguard Worker     for (const MachineInstr &MI : MBB) {
323*9880d681SAndroid Build Coastguard Worker       // TODO: CodeSize should account for multiple functions.
324*9880d681SAndroid Build Coastguard Worker 
325*9880d681SAndroid Build Coastguard Worker       // TODO: Should we count size of debug info?
326*9880d681SAndroid Build Coastguard Worker       if (MI.isDebugValue())
327*9880d681SAndroid Build Coastguard Worker         continue;
328*9880d681SAndroid Build Coastguard Worker 
329*9880d681SAndroid Build Coastguard Worker       CodeSize += TII->getInstSizeInBytes(MI);
330*9880d681SAndroid Build Coastguard Worker 
331*9880d681SAndroid Build Coastguard Worker       unsigned numOperands = MI.getNumOperands();
332*9880d681SAndroid Build Coastguard Worker       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
333*9880d681SAndroid Build Coastguard Worker         const MachineOperand &MO = MI.getOperand(op_idx);
334*9880d681SAndroid Build Coastguard Worker         unsigned width = 0;
335*9880d681SAndroid Build Coastguard Worker         bool isSGPR = false;
336*9880d681SAndroid Build Coastguard Worker 
337*9880d681SAndroid Build Coastguard Worker         if (!MO.isReg())
338*9880d681SAndroid Build Coastguard Worker           continue;
339*9880d681SAndroid Build Coastguard Worker 
340*9880d681SAndroid Build Coastguard Worker         unsigned reg = MO.getReg();
341*9880d681SAndroid Build Coastguard Worker         switch (reg) {
342*9880d681SAndroid Build Coastguard Worker         case AMDGPU::EXEC:
343*9880d681SAndroid Build Coastguard Worker         case AMDGPU::EXEC_LO:
344*9880d681SAndroid Build Coastguard Worker         case AMDGPU::EXEC_HI:
345*9880d681SAndroid Build Coastguard Worker         case AMDGPU::SCC:
346*9880d681SAndroid Build Coastguard Worker         case AMDGPU::M0:
347*9880d681SAndroid Build Coastguard Worker           continue;
348*9880d681SAndroid Build Coastguard Worker 
349*9880d681SAndroid Build Coastguard Worker         case AMDGPU::VCC:
350*9880d681SAndroid Build Coastguard Worker         case AMDGPU::VCC_LO:
351*9880d681SAndroid Build Coastguard Worker         case AMDGPU::VCC_HI:
352*9880d681SAndroid Build Coastguard Worker           VCCUsed = true;
353*9880d681SAndroid Build Coastguard Worker           continue;
354*9880d681SAndroid Build Coastguard Worker 
355*9880d681SAndroid Build Coastguard Worker         case AMDGPU::FLAT_SCR:
356*9880d681SAndroid Build Coastguard Worker         case AMDGPU::FLAT_SCR_LO:
357*9880d681SAndroid Build Coastguard Worker         case AMDGPU::FLAT_SCR_HI:
358*9880d681SAndroid Build Coastguard Worker           FlatUsed = true;
359*9880d681SAndroid Build Coastguard Worker           continue;
360*9880d681SAndroid Build Coastguard Worker 
361*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TBA:
362*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TBA_LO:
363*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TBA_HI:
364*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TMA:
365*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TMA_LO:
366*9880d681SAndroid Build Coastguard Worker         case AMDGPU::TMA_HI:
367*9880d681SAndroid Build Coastguard Worker           llvm_unreachable("Trap Handler registers should not be used");
368*9880d681SAndroid Build Coastguard Worker           continue;
369*9880d681SAndroid Build Coastguard Worker 
370*9880d681SAndroid Build Coastguard Worker         default:
371*9880d681SAndroid Build Coastguard Worker           break;
372*9880d681SAndroid Build Coastguard Worker         }
373*9880d681SAndroid Build Coastguard Worker 
374*9880d681SAndroid Build Coastguard Worker         if (AMDGPU::SReg_32RegClass.contains(reg)) {
375*9880d681SAndroid Build Coastguard Worker           if (AMDGPU::TTMP_32RegClass.contains(reg)) {
376*9880d681SAndroid Build Coastguard Worker             llvm_unreachable("Trap Handler registers should not be used");
377*9880d681SAndroid Build Coastguard Worker           }
378*9880d681SAndroid Build Coastguard Worker           isSGPR = true;
379*9880d681SAndroid Build Coastguard Worker           width = 1;
380*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
381*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
382*9880d681SAndroid Build Coastguard Worker           width = 1;
383*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
384*9880d681SAndroid Build Coastguard Worker           if (AMDGPU::TTMP_64RegClass.contains(reg)) {
385*9880d681SAndroid Build Coastguard Worker             llvm_unreachable("Trap Handler registers should not be used");
386*9880d681SAndroid Build Coastguard Worker           }
387*9880d681SAndroid Build Coastguard Worker           isSGPR = true;
388*9880d681SAndroid Build Coastguard Worker           width = 2;
389*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
390*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
391*9880d681SAndroid Build Coastguard Worker           width = 2;
392*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
393*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
394*9880d681SAndroid Build Coastguard Worker           width = 3;
395*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
396*9880d681SAndroid Build Coastguard Worker           isSGPR = true;
397*9880d681SAndroid Build Coastguard Worker           width = 4;
398*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
399*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
400*9880d681SAndroid Build Coastguard Worker           width = 4;
401*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
402*9880d681SAndroid Build Coastguard Worker           isSGPR = true;
403*9880d681SAndroid Build Coastguard Worker           width = 8;
404*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
405*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
406*9880d681SAndroid Build Coastguard Worker           width = 8;
407*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
408*9880d681SAndroid Build Coastguard Worker           isSGPR = true;
409*9880d681SAndroid Build Coastguard Worker           width = 16;
410*9880d681SAndroid Build Coastguard Worker         } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
411*9880d681SAndroid Build Coastguard Worker           isSGPR = false;
412*9880d681SAndroid Build Coastguard Worker           width = 16;
413*9880d681SAndroid Build Coastguard Worker         } else {
414*9880d681SAndroid Build Coastguard Worker           llvm_unreachable("Unknown register class");
415*9880d681SAndroid Build Coastguard Worker         }
416*9880d681SAndroid Build Coastguard Worker         unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
417*9880d681SAndroid Build Coastguard Worker         unsigned maxUsed = hwReg + width - 1;
418*9880d681SAndroid Build Coastguard Worker         if (isSGPR) {
419*9880d681SAndroid Build Coastguard Worker           MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
420*9880d681SAndroid Build Coastguard Worker         } else {
421*9880d681SAndroid Build Coastguard Worker           MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
422*9880d681SAndroid Build Coastguard Worker         }
423*9880d681SAndroid Build Coastguard Worker       }
424*9880d681SAndroid Build Coastguard Worker     }
425*9880d681SAndroid Build Coastguard Worker   }
426*9880d681SAndroid Build Coastguard Worker 
427*9880d681SAndroid Build Coastguard Worker   unsigned ExtraSGPRs = 0;
428*9880d681SAndroid Build Coastguard Worker 
429*9880d681SAndroid Build Coastguard Worker   if (VCCUsed)
430*9880d681SAndroid Build Coastguard Worker     ExtraSGPRs = 2;
431*9880d681SAndroid Build Coastguard Worker 
432*9880d681SAndroid Build Coastguard Worker   if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
433*9880d681SAndroid Build Coastguard Worker     if (FlatUsed)
434*9880d681SAndroid Build Coastguard Worker       ExtraSGPRs = 4;
435*9880d681SAndroid Build Coastguard Worker   } else {
436*9880d681SAndroid Build Coastguard Worker     if (STM.isXNACKEnabled())
437*9880d681SAndroid Build Coastguard Worker       ExtraSGPRs = 4;
438*9880d681SAndroid Build Coastguard Worker 
439*9880d681SAndroid Build Coastguard Worker     if (FlatUsed)
440*9880d681SAndroid Build Coastguard Worker       ExtraSGPRs = 6;
441*9880d681SAndroid Build Coastguard Worker   }
442*9880d681SAndroid Build Coastguard Worker 
443*9880d681SAndroid Build Coastguard Worker   MaxSGPR += ExtraSGPRs;
444*9880d681SAndroid Build Coastguard Worker 
445*9880d681SAndroid Build Coastguard Worker   // Record first reserved register and reserved register count fields, and
446*9880d681SAndroid Build Coastguard Worker   // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
447*9880d681SAndroid Build Coastguard Worker   // specified.
448*9880d681SAndroid Build Coastguard Worker   if (STM.debuggerReserveRegs()) {
449*9880d681SAndroid Build Coastguard Worker     ProgInfo.ReservedVGPRFirst = MaxVGPR + 1;
450*9880d681SAndroid Build Coastguard Worker     ProgInfo.ReservedVGPRCount = MFI->getDebuggerReservedVGPRCount();
451*9880d681SAndroid Build Coastguard Worker     MaxVGPR += MFI->getDebuggerReservedVGPRCount();
452*9880d681SAndroid Build Coastguard Worker   }
453*9880d681SAndroid Build Coastguard Worker 
454*9880d681SAndroid Build Coastguard Worker   // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
455*9880d681SAndroid Build Coastguard Worker   // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
456*9880d681SAndroid Build Coastguard Worker   // attribute was specified.
457*9880d681SAndroid Build Coastguard Worker   if (STM.debuggerEmitPrologue()) {
458*9880d681SAndroid Build Coastguard Worker     ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
459*9880d681SAndroid Build Coastguard Worker       RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
460*9880d681SAndroid Build Coastguard Worker     ProgInfo.DebuggerPrivateSegmentBufferSGPR =
461*9880d681SAndroid Build Coastguard Worker       RI->getHWRegIndex(MFI->getScratchRSrcReg());
462*9880d681SAndroid Build Coastguard Worker   }
463*9880d681SAndroid Build Coastguard Worker 
464*9880d681SAndroid Build Coastguard Worker   // We found the maximum register index. They start at 0, so add one to get the
465*9880d681SAndroid Build Coastguard Worker   // number of registers.
466*9880d681SAndroid Build Coastguard Worker   ProgInfo.NumVGPR = MaxVGPR + 1;
467*9880d681SAndroid Build Coastguard Worker   ProgInfo.NumSGPR = MaxSGPR + 1;
468*9880d681SAndroid Build Coastguard Worker 
469*9880d681SAndroid Build Coastguard Worker   if (STM.hasSGPRInitBug()) {
470*9880d681SAndroid Build Coastguard Worker     if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
471*9880d681SAndroid Build Coastguard Worker       LLVMContext &Ctx = MF.getFunction()->getContext();
472*9880d681SAndroid Build Coastguard Worker       DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
473*9880d681SAndroid Build Coastguard Worker                                        "SGPRs with SGPR init bug",
474*9880d681SAndroid Build Coastguard Worker                                        ProgInfo.NumSGPR, DS_Error);
475*9880d681SAndroid Build Coastguard Worker       Ctx.diagnose(Diag);
476*9880d681SAndroid Build Coastguard Worker     }
477*9880d681SAndroid Build Coastguard Worker 
478*9880d681SAndroid Build Coastguard Worker     ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
479*9880d681SAndroid Build Coastguard Worker   }
480*9880d681SAndroid Build Coastguard Worker 
481*9880d681SAndroid Build Coastguard Worker   if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
482*9880d681SAndroid Build Coastguard Worker     LLVMContext &Ctx = MF.getFunction()->getContext();
483*9880d681SAndroid Build Coastguard Worker     DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
484*9880d681SAndroid Build Coastguard Worker                                      MFI->NumUserSGPRs, DS_Error);
485*9880d681SAndroid Build Coastguard Worker     Ctx.diagnose(Diag);
486*9880d681SAndroid Build Coastguard Worker   }
487*9880d681SAndroid Build Coastguard Worker 
488*9880d681SAndroid Build Coastguard Worker   if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) {
489*9880d681SAndroid Build Coastguard Worker     LLVMContext &Ctx = MF.getFunction()->getContext();
490*9880d681SAndroid Build Coastguard Worker     DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
491*9880d681SAndroid Build Coastguard Worker                                      MFI->LDSSize, DS_Error);
492*9880d681SAndroid Build Coastguard Worker     Ctx.diagnose(Diag);
493*9880d681SAndroid Build Coastguard Worker   }
494*9880d681SAndroid Build Coastguard Worker 
495*9880d681SAndroid Build Coastguard Worker   ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
496*9880d681SAndroid Build Coastguard Worker   ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
497*9880d681SAndroid Build Coastguard Worker   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
498*9880d681SAndroid Build Coastguard Worker   // register.
499*9880d681SAndroid Build Coastguard Worker   ProgInfo.FloatMode = getFPMode(MF);
500*9880d681SAndroid Build Coastguard Worker 
501*9880d681SAndroid Build Coastguard Worker   ProgInfo.IEEEMode = 0;
502*9880d681SAndroid Build Coastguard Worker 
503*9880d681SAndroid Build Coastguard Worker   // Make clamp modifier on NaN input returns 0.
504*9880d681SAndroid Build Coastguard Worker   ProgInfo.DX10Clamp = 1;
505*9880d681SAndroid Build Coastguard Worker 
506*9880d681SAndroid Build Coastguard Worker   const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
507*9880d681SAndroid Build Coastguard Worker   ProgInfo.ScratchSize = FrameInfo->getStackSize();
508*9880d681SAndroid Build Coastguard Worker 
509*9880d681SAndroid Build Coastguard Worker   ProgInfo.FlatUsed = FlatUsed;
510*9880d681SAndroid Build Coastguard Worker   ProgInfo.VCCUsed = VCCUsed;
511*9880d681SAndroid Build Coastguard Worker   ProgInfo.CodeLen = CodeSize;
512*9880d681SAndroid Build Coastguard Worker 
513*9880d681SAndroid Build Coastguard Worker   unsigned LDSAlignShift;
514*9880d681SAndroid Build Coastguard Worker   if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
515*9880d681SAndroid Build Coastguard Worker     // LDS is allocated in 64 dword blocks.
516*9880d681SAndroid Build Coastguard Worker     LDSAlignShift = 8;
517*9880d681SAndroid Build Coastguard Worker   } else {
518*9880d681SAndroid Build Coastguard Worker     // LDS is allocated in 128 dword blocks.
519*9880d681SAndroid Build Coastguard Worker     LDSAlignShift = 9;
520*9880d681SAndroid Build Coastguard Worker   }
521*9880d681SAndroid Build Coastguard Worker 
522*9880d681SAndroid Build Coastguard Worker   unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
523*9880d681SAndroid Build Coastguard Worker                           MFI->getMaximumWorkGroupSize(MF);
524*9880d681SAndroid Build Coastguard Worker 
525*9880d681SAndroid Build Coastguard Worker   ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
526*9880d681SAndroid Build Coastguard Worker   ProgInfo.LDSBlocks =
527*9880d681SAndroid Build Coastguard Worker       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
528*9880d681SAndroid Build Coastguard Worker 
529*9880d681SAndroid Build Coastguard Worker   // Scratch is allocated in 256 dword blocks.
530*9880d681SAndroid Build Coastguard Worker   unsigned ScratchAlignShift = 10;
531*9880d681SAndroid Build Coastguard Worker   // We need to program the hardware with the amount of scratch memory that
532*9880d681SAndroid Build Coastguard Worker   // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
533*9880d681SAndroid Build Coastguard Worker   // scratch memory used per thread.
534*9880d681SAndroid Build Coastguard Worker   ProgInfo.ScratchBlocks =
535*9880d681SAndroid Build Coastguard Worker       alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
536*9880d681SAndroid Build Coastguard Worker               1ULL << ScratchAlignShift) >>
537*9880d681SAndroid Build Coastguard Worker       ScratchAlignShift;
538*9880d681SAndroid Build Coastguard Worker 
539*9880d681SAndroid Build Coastguard Worker   ProgInfo.ComputePGMRSrc1 =
540*9880d681SAndroid Build Coastguard Worker       S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
541*9880d681SAndroid Build Coastguard Worker       S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
542*9880d681SAndroid Build Coastguard Worker       S_00B848_PRIORITY(ProgInfo.Priority) |
543*9880d681SAndroid Build Coastguard Worker       S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
544*9880d681SAndroid Build Coastguard Worker       S_00B848_PRIV(ProgInfo.Priv) |
545*9880d681SAndroid Build Coastguard Worker       S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
546*9880d681SAndroid Build Coastguard Worker       S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
547*9880d681SAndroid Build Coastguard Worker       S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
548*9880d681SAndroid Build Coastguard Worker 
549*9880d681SAndroid Build Coastguard Worker   // 0 = X, 1 = XY, 2 = XYZ
550*9880d681SAndroid Build Coastguard Worker   unsigned TIDIGCompCnt = 0;
551*9880d681SAndroid Build Coastguard Worker   if (MFI->hasWorkItemIDZ())
552*9880d681SAndroid Build Coastguard Worker     TIDIGCompCnt = 2;
553*9880d681SAndroid Build Coastguard Worker   else if (MFI->hasWorkItemIDY())
554*9880d681SAndroid Build Coastguard Worker     TIDIGCompCnt = 1;
555*9880d681SAndroid Build Coastguard Worker 
556*9880d681SAndroid Build Coastguard Worker   ProgInfo.ComputePGMRSrc2 =
557*9880d681SAndroid Build Coastguard Worker       S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
558*9880d681SAndroid Build Coastguard Worker       S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
559*9880d681SAndroid Build Coastguard Worker       S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
560*9880d681SAndroid Build Coastguard Worker       S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
561*9880d681SAndroid Build Coastguard Worker       S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
562*9880d681SAndroid Build Coastguard Worker       S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
563*9880d681SAndroid Build Coastguard Worker       S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
564*9880d681SAndroid Build Coastguard Worker       S_00B84C_EXCP_EN_MSB(0) |
565*9880d681SAndroid Build Coastguard Worker       S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
566*9880d681SAndroid Build Coastguard Worker       S_00B84C_EXCP_EN(0);
567*9880d681SAndroid Build Coastguard Worker }
568*9880d681SAndroid Build Coastguard Worker 
getRsrcReg(CallingConv::ID CallConv)569*9880d681SAndroid Build Coastguard Worker static unsigned getRsrcReg(CallingConv::ID CallConv) {
570*9880d681SAndroid Build Coastguard Worker   switch (CallConv) {
571*9880d681SAndroid Build Coastguard Worker   default: // Fall through
572*9880d681SAndroid Build Coastguard Worker   case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
573*9880d681SAndroid Build Coastguard Worker   case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
574*9880d681SAndroid Build Coastguard Worker   case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
575*9880d681SAndroid Build Coastguard Worker   case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
576*9880d681SAndroid Build Coastguard Worker   }
577*9880d681SAndroid Build Coastguard Worker }
578*9880d681SAndroid Build Coastguard Worker 
EmitProgramInfoSI(const MachineFunction & MF,const SIProgramInfo & KernelInfo)579*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
580*9880d681SAndroid Build Coastguard Worker                                          const SIProgramInfo &KernelInfo) {
581*9880d681SAndroid Build Coastguard Worker   const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
582*9880d681SAndroid Build Coastguard Worker   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
583*9880d681SAndroid Build Coastguard Worker   unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
584*9880d681SAndroid Build Coastguard Worker 
585*9880d681SAndroid Build Coastguard Worker   if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
586*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
587*9880d681SAndroid Build Coastguard Worker 
588*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
589*9880d681SAndroid Build Coastguard Worker 
590*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
591*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
592*9880d681SAndroid Build Coastguard Worker 
593*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
594*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
595*9880d681SAndroid Build Coastguard Worker 
596*9880d681SAndroid Build Coastguard Worker     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
597*9880d681SAndroid Build Coastguard Worker     // 0" comment but I don't see a corresponding field in the register spec.
598*9880d681SAndroid Build Coastguard Worker   } else {
599*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(RsrcReg, 4);
600*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
601*9880d681SAndroid Build Coastguard Worker                               S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
602*9880d681SAndroid Build Coastguard Worker     if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
603*9880d681SAndroid Build Coastguard Worker       OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
604*9880d681SAndroid Build Coastguard Worker       OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
605*9880d681SAndroid Build Coastguard Worker     }
606*9880d681SAndroid Build Coastguard Worker   }
607*9880d681SAndroid Build Coastguard Worker 
608*9880d681SAndroid Build Coastguard Worker   if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
609*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
610*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
611*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
612*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
613*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
614*9880d681SAndroid Build Coastguard Worker     OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
615*9880d681SAndroid Build Coastguard Worker   }
616*9880d681SAndroid Build Coastguard Worker 
617*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
618*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
619*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
620*9880d681SAndroid Build Coastguard Worker   OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
621*9880d681SAndroid Build Coastguard Worker }
622*9880d681SAndroid Build Coastguard Worker 
623*9880d681SAndroid Build Coastguard Worker // This is supposed to be log2(Size)
getElementByteSizeValue(unsigned Size)624*9880d681SAndroid Build Coastguard Worker static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
625*9880d681SAndroid Build Coastguard Worker   switch (Size) {
626*9880d681SAndroid Build Coastguard Worker   case 4:
627*9880d681SAndroid Build Coastguard Worker     return AMD_ELEMENT_4_BYTES;
628*9880d681SAndroid Build Coastguard Worker   case 8:
629*9880d681SAndroid Build Coastguard Worker     return AMD_ELEMENT_8_BYTES;
630*9880d681SAndroid Build Coastguard Worker   case 16:
631*9880d681SAndroid Build Coastguard Worker     return AMD_ELEMENT_16_BYTES;
632*9880d681SAndroid Build Coastguard Worker   default:
633*9880d681SAndroid Build Coastguard Worker     llvm_unreachable("invalid private_element_size");
634*9880d681SAndroid Build Coastguard Worker   }
635*9880d681SAndroid Build Coastguard Worker }
636*9880d681SAndroid Build Coastguard Worker 
EmitAmdKernelCodeT(const MachineFunction & MF,const SIProgramInfo & KernelInfo) const637*9880d681SAndroid Build Coastguard Worker void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
638*9880d681SAndroid Build Coastguard Worker                                          const SIProgramInfo &KernelInfo) const {
639*9880d681SAndroid Build Coastguard Worker   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
640*9880d681SAndroid Build Coastguard Worker   const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
641*9880d681SAndroid Build Coastguard Worker   amd_kernel_code_t header;
642*9880d681SAndroid Build Coastguard Worker 
643*9880d681SAndroid Build Coastguard Worker   AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
644*9880d681SAndroid Build Coastguard Worker 
645*9880d681SAndroid Build Coastguard Worker   header.compute_pgm_resource_registers =
646*9880d681SAndroid Build Coastguard Worker       KernelInfo.ComputePGMRSrc1 |
647*9880d681SAndroid Build Coastguard Worker       (KernelInfo.ComputePGMRSrc2 << 32);
648*9880d681SAndroid Build Coastguard Worker   header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
649*9880d681SAndroid Build Coastguard Worker 
650*9880d681SAndroid Build Coastguard Worker 
651*9880d681SAndroid Build Coastguard Worker   AMD_HSA_BITS_SET(header.code_properties,
652*9880d681SAndroid Build Coastguard Worker                    AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
653*9880d681SAndroid Build Coastguard Worker                    getElementByteSizeValue(STM.getMaxPrivateElementSize()));
654*9880d681SAndroid Build Coastguard Worker 
655*9880d681SAndroid Build Coastguard Worker   if (MFI->hasPrivateSegmentBuffer()) {
656*9880d681SAndroid Build Coastguard Worker     header.code_properties |=
657*9880d681SAndroid Build Coastguard Worker       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
658*9880d681SAndroid Build Coastguard Worker   }
659*9880d681SAndroid Build Coastguard Worker 
660*9880d681SAndroid Build Coastguard Worker   if (MFI->hasDispatchPtr())
661*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
662*9880d681SAndroid Build Coastguard Worker 
663*9880d681SAndroid Build Coastguard Worker   if (MFI->hasQueuePtr())
664*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
665*9880d681SAndroid Build Coastguard Worker 
666*9880d681SAndroid Build Coastguard Worker   if (MFI->hasKernargSegmentPtr())
667*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
668*9880d681SAndroid Build Coastguard Worker 
669*9880d681SAndroid Build Coastguard Worker   if (MFI->hasDispatchID())
670*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
671*9880d681SAndroid Build Coastguard Worker 
672*9880d681SAndroid Build Coastguard Worker   if (MFI->hasFlatScratchInit())
673*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
674*9880d681SAndroid Build Coastguard Worker 
675*9880d681SAndroid Build Coastguard Worker   // TODO: Private segment size
676*9880d681SAndroid Build Coastguard Worker 
677*9880d681SAndroid Build Coastguard Worker   if (MFI->hasGridWorkgroupCountX()) {
678*9880d681SAndroid Build Coastguard Worker     header.code_properties |=
679*9880d681SAndroid Build Coastguard Worker       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
680*9880d681SAndroid Build Coastguard Worker   }
681*9880d681SAndroid Build Coastguard Worker 
682*9880d681SAndroid Build Coastguard Worker   if (MFI->hasGridWorkgroupCountY()) {
683*9880d681SAndroid Build Coastguard Worker     header.code_properties |=
684*9880d681SAndroid Build Coastguard Worker       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
685*9880d681SAndroid Build Coastguard Worker   }
686*9880d681SAndroid Build Coastguard Worker 
687*9880d681SAndroid Build Coastguard Worker   if (MFI->hasGridWorkgroupCountZ()) {
688*9880d681SAndroid Build Coastguard Worker     header.code_properties |=
689*9880d681SAndroid Build Coastguard Worker       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
690*9880d681SAndroid Build Coastguard Worker   }
691*9880d681SAndroid Build Coastguard Worker 
692*9880d681SAndroid Build Coastguard Worker   if (MFI->hasDispatchPtr())
693*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
694*9880d681SAndroid Build Coastguard Worker 
695*9880d681SAndroid Build Coastguard Worker   if (STM.debuggerSupported())
696*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
697*9880d681SAndroid Build Coastguard Worker 
698*9880d681SAndroid Build Coastguard Worker   if (STM.isXNACKEnabled())
699*9880d681SAndroid Build Coastguard Worker     header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
700*9880d681SAndroid Build Coastguard Worker 
701*9880d681SAndroid Build Coastguard Worker   header.kernarg_segment_byte_size = MFI->ABIArgOffset;
702*9880d681SAndroid Build Coastguard Worker   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
703*9880d681SAndroid Build Coastguard Worker   header.workitem_vgpr_count = KernelInfo.NumVGPR;
704*9880d681SAndroid Build Coastguard Worker   header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
705*9880d681SAndroid Build Coastguard Worker   header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
706*9880d681SAndroid Build Coastguard Worker   header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
707*9880d681SAndroid Build Coastguard Worker   header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
708*9880d681SAndroid Build Coastguard Worker 
709*9880d681SAndroid Build Coastguard Worker   if (STM.debuggerEmitPrologue()) {
710*9880d681SAndroid Build Coastguard Worker     header.debug_wavefront_private_segment_offset_sgpr =
711*9880d681SAndroid Build Coastguard Worker       KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
712*9880d681SAndroid Build Coastguard Worker     header.debug_private_segment_buffer_sgpr =
713*9880d681SAndroid Build Coastguard Worker       KernelInfo.DebuggerPrivateSegmentBufferSGPR;
714*9880d681SAndroid Build Coastguard Worker   }
715*9880d681SAndroid Build Coastguard Worker 
716*9880d681SAndroid Build Coastguard Worker   AMDGPUTargetStreamer *TS =
717*9880d681SAndroid Build Coastguard Worker       static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
718*9880d681SAndroid Build Coastguard Worker 
719*9880d681SAndroid Build Coastguard Worker   OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
720*9880d681SAndroid Build Coastguard Worker   TS->EmitAMDKernelCodeT(header);
721*9880d681SAndroid Build Coastguard Worker }
722*9880d681SAndroid Build Coastguard Worker 
PrintAsmOperand(const MachineInstr * MI,unsigned OpNo,unsigned AsmVariant,const char * ExtraCode,raw_ostream & O)723*9880d681SAndroid Build Coastguard Worker bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
724*9880d681SAndroid Build Coastguard Worker                                        unsigned AsmVariant,
725*9880d681SAndroid Build Coastguard Worker                                        const char *ExtraCode, raw_ostream &O) {
726*9880d681SAndroid Build Coastguard Worker   if (ExtraCode && ExtraCode[0]) {
727*9880d681SAndroid Build Coastguard Worker     if (ExtraCode[1] != 0)
728*9880d681SAndroid Build Coastguard Worker       return true; // Unknown modifier.
729*9880d681SAndroid Build Coastguard Worker 
730*9880d681SAndroid Build Coastguard Worker     switch (ExtraCode[0]) {
731*9880d681SAndroid Build Coastguard Worker     default:
732*9880d681SAndroid Build Coastguard Worker       // See if this is a generic print operand
733*9880d681SAndroid Build Coastguard Worker       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
734*9880d681SAndroid Build Coastguard Worker     case 'r':
735*9880d681SAndroid Build Coastguard Worker       break;
736*9880d681SAndroid Build Coastguard Worker     }
737*9880d681SAndroid Build Coastguard Worker   }
738*9880d681SAndroid Build Coastguard Worker 
739*9880d681SAndroid Build Coastguard Worker   AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
740*9880d681SAndroid Build Coastguard Worker                    *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
741*9880d681SAndroid Build Coastguard Worker   return false;
742*9880d681SAndroid Build Coastguard Worker }
743