xref: /aosp_15_r20/external/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // Top-level implementation for the NVPTX target.
11*9880d681SAndroid Build Coastguard Worker //
12*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
13*9880d681SAndroid Build Coastguard Worker 
14*9880d681SAndroid Build Coastguard Worker #include "NVPTXTargetMachine.h"
15*9880d681SAndroid Build Coastguard Worker #include "MCTargetDesc/NVPTXMCAsmInfo.h"
16*9880d681SAndroid Build Coastguard Worker #include "NVPTX.h"
17*9880d681SAndroid Build Coastguard Worker #include "NVPTXAllocaHoisting.h"
18*9880d681SAndroid Build Coastguard Worker #include "NVPTXLowerAggrCopies.h"
19*9880d681SAndroid Build Coastguard Worker #include "NVPTXTargetObjectFile.h"
20*9880d681SAndroid Build Coastguard Worker #include "NVPTXTargetTransformInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/Passes.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/AsmPrinter.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionAnalysis.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineModuleInfo.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/Passes.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/TargetPassConfig.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/DataLayout.h"
28*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/IRPrintingPasses.h"
29*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/LegacyPassManager.h"
30*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Verifier.h"
31*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCAsmInfo.h"
32*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCInstrInfo.h"
33*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCStreamer.h"
34*9880d681SAndroid Build Coastguard Worker #include "llvm/MC/MCSubtargetInfo.h"
35*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/CommandLine.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/FormattedStream.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/TargetRegistry.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
40*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetInstrInfo.h"
41*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
42*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLoweringObjectFile.h"
43*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetMachine.h"
44*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetOptions.h"
45*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetRegisterInfo.h"
46*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetSubtargetInfo.h"
47*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Scalar.h"
48*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Scalar/GVN.h"
49*9880d681SAndroid Build Coastguard Worker 
50*9880d681SAndroid Build Coastguard Worker using namespace llvm;
51*9880d681SAndroid Build Coastguard Worker 
52*9880d681SAndroid Build Coastguard Worker static cl::opt<bool> UseInferAddressSpaces(
53*9880d681SAndroid Build Coastguard Worker     "nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,
54*9880d681SAndroid Build Coastguard Worker     cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
55*9880d681SAndroid Build Coastguard Worker              "NVPTXFavorNonGenericAddrSpaces"));
56*9880d681SAndroid Build Coastguard Worker 
57*9880d681SAndroid Build Coastguard Worker namespace llvm {
58*9880d681SAndroid Build Coastguard Worker void initializeNVVMIntrRangePass(PassRegistry&);
59*9880d681SAndroid Build Coastguard Worker void initializeNVVMReflectPass(PassRegistry&);
60*9880d681SAndroid Build Coastguard Worker void initializeGenericToNVVMPass(PassRegistry&);
61*9880d681SAndroid Build Coastguard Worker void initializeNVPTXAllocaHoistingPass(PassRegistry &);
62*9880d681SAndroid Build Coastguard Worker void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
63*9880d681SAndroid Build Coastguard Worker void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
64*9880d681SAndroid Build Coastguard Worker void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
65*9880d681SAndroid Build Coastguard Worker void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
66*9880d681SAndroid Build Coastguard Worker void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
67*9880d681SAndroid Build Coastguard Worker void initializeNVPTXLowerAllocaPass(PassRegistry &);
68*9880d681SAndroid Build Coastguard Worker }
69*9880d681SAndroid Build Coastguard Worker 
LLVMInitializeNVPTXTarget()70*9880d681SAndroid Build Coastguard Worker extern "C" void LLVMInitializeNVPTXTarget() {
71*9880d681SAndroid Build Coastguard Worker   // Register the target.
72*9880d681SAndroid Build Coastguard Worker   RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
73*9880d681SAndroid Build Coastguard Worker   RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
74*9880d681SAndroid Build Coastguard Worker 
75*9880d681SAndroid Build Coastguard Worker   // FIXME: This pass is really intended to be invoked during IR optimization,
76*9880d681SAndroid Build Coastguard Worker   // but it's very NVPTX-specific.
77*9880d681SAndroid Build Coastguard Worker   PassRegistry &PR = *PassRegistry::getPassRegistry();
78*9880d681SAndroid Build Coastguard Worker   initializeNVVMReflectPass(PR);
79*9880d681SAndroid Build Coastguard Worker   initializeNVVMIntrRangePass(PR);
80*9880d681SAndroid Build Coastguard Worker   initializeGenericToNVVMPass(PR);
81*9880d681SAndroid Build Coastguard Worker   initializeNVPTXAllocaHoistingPass(PR);
82*9880d681SAndroid Build Coastguard Worker   initializeNVPTXAssignValidGlobalNamesPass(PR);
83*9880d681SAndroid Build Coastguard Worker   initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
84*9880d681SAndroid Build Coastguard Worker   initializeNVPTXInferAddressSpacesPass(PR);
85*9880d681SAndroid Build Coastguard Worker   initializeNVPTXLowerKernelArgsPass(PR);
86*9880d681SAndroid Build Coastguard Worker   initializeNVPTXLowerAllocaPass(PR);
87*9880d681SAndroid Build Coastguard Worker   initializeNVPTXLowerAggrCopiesPass(PR);
88*9880d681SAndroid Build Coastguard Worker }
89*9880d681SAndroid Build Coastguard Worker 
computeDataLayout(bool is64Bit)90*9880d681SAndroid Build Coastguard Worker static std::string computeDataLayout(bool is64Bit) {
91*9880d681SAndroid Build Coastguard Worker   std::string Ret = "e";
92*9880d681SAndroid Build Coastguard Worker 
93*9880d681SAndroid Build Coastguard Worker   if (!is64Bit)
94*9880d681SAndroid Build Coastguard Worker     Ret += "-p:32:32";
95*9880d681SAndroid Build Coastguard Worker 
96*9880d681SAndroid Build Coastguard Worker   Ret += "-i64:64-v16:16-v32:32-n16:32:64";
97*9880d681SAndroid Build Coastguard Worker 
98*9880d681SAndroid Build Coastguard Worker   return Ret;
99*9880d681SAndroid Build Coastguard Worker }
100*9880d681SAndroid Build Coastguard Worker 
NVPTXTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,CodeModel::Model CM,CodeGenOpt::Level OL,bool is64bit)101*9880d681SAndroid Build Coastguard Worker NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
102*9880d681SAndroid Build Coastguard Worker                                        StringRef CPU, StringRef FS,
103*9880d681SAndroid Build Coastguard Worker                                        const TargetOptions &Options,
104*9880d681SAndroid Build Coastguard Worker                                        Optional<Reloc::Model> RM,
105*9880d681SAndroid Build Coastguard Worker                                        CodeModel::Model CM,
106*9880d681SAndroid Build Coastguard Worker                                        CodeGenOpt::Level OL, bool is64bit)
107*9880d681SAndroid Build Coastguard Worker     // The pic relocation model is used regardless of what the client has
108*9880d681SAndroid Build Coastguard Worker     // specified, as it is the only relocation model currently supported.
109*9880d681SAndroid Build Coastguard Worker     : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
110*9880d681SAndroid Build Coastguard Worker                         Reloc::PIC_, CM, OL),
111*9880d681SAndroid Build Coastguard Worker       is64bit(is64bit),
112*9880d681SAndroid Build Coastguard Worker       TLOF(make_unique<NVPTXTargetObjectFile>()),
113*9880d681SAndroid Build Coastguard Worker       Subtarget(TT, CPU, FS, *this) {
114*9880d681SAndroid Build Coastguard Worker   if (TT.getOS() == Triple::NVCL)
115*9880d681SAndroid Build Coastguard Worker     drvInterface = NVPTX::NVCL;
116*9880d681SAndroid Build Coastguard Worker   else
117*9880d681SAndroid Build Coastguard Worker     drvInterface = NVPTX::CUDA;
118*9880d681SAndroid Build Coastguard Worker   initAsmInfo();
119*9880d681SAndroid Build Coastguard Worker }
120*9880d681SAndroid Build Coastguard Worker 
~NVPTXTargetMachine()121*9880d681SAndroid Build Coastguard Worker NVPTXTargetMachine::~NVPTXTargetMachine() {}
122*9880d681SAndroid Build Coastguard Worker 
anchor()123*9880d681SAndroid Build Coastguard Worker void NVPTXTargetMachine32::anchor() {}
124*9880d681SAndroid Build Coastguard Worker 
NVPTXTargetMachine32(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,CodeModel::Model CM,CodeGenOpt::Level OL)125*9880d681SAndroid Build Coastguard Worker NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
126*9880d681SAndroid Build Coastguard Worker                                            StringRef CPU, StringRef FS,
127*9880d681SAndroid Build Coastguard Worker                                            const TargetOptions &Options,
128*9880d681SAndroid Build Coastguard Worker                                            Optional<Reloc::Model> RM,
129*9880d681SAndroid Build Coastguard Worker                                            CodeModel::Model CM,
130*9880d681SAndroid Build Coastguard Worker                                            CodeGenOpt::Level OL)
131*9880d681SAndroid Build Coastguard Worker     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
132*9880d681SAndroid Build Coastguard Worker 
anchor()133*9880d681SAndroid Build Coastguard Worker void NVPTXTargetMachine64::anchor() {}
134*9880d681SAndroid Build Coastguard Worker 
NVPTXTargetMachine64(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,CodeModel::Model CM,CodeGenOpt::Level OL)135*9880d681SAndroid Build Coastguard Worker NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
136*9880d681SAndroid Build Coastguard Worker                                            StringRef CPU, StringRef FS,
137*9880d681SAndroid Build Coastguard Worker                                            const TargetOptions &Options,
138*9880d681SAndroid Build Coastguard Worker                                            Optional<Reloc::Model> RM,
139*9880d681SAndroid Build Coastguard Worker                                            CodeModel::Model CM,
140*9880d681SAndroid Build Coastguard Worker                                            CodeGenOpt::Level OL)
141*9880d681SAndroid Build Coastguard Worker     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
142*9880d681SAndroid Build Coastguard Worker 
143*9880d681SAndroid Build Coastguard Worker namespace {
144*9880d681SAndroid Build Coastguard Worker class NVPTXPassConfig : public TargetPassConfig {
145*9880d681SAndroid Build Coastguard Worker public:
NVPTXPassConfig(NVPTXTargetMachine * TM,PassManagerBase & PM)146*9880d681SAndroid Build Coastguard Worker   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
147*9880d681SAndroid Build Coastguard Worker       : TargetPassConfig(TM, PM) {}
148*9880d681SAndroid Build Coastguard Worker 
getNVPTXTargetMachine() const149*9880d681SAndroid Build Coastguard Worker   NVPTXTargetMachine &getNVPTXTargetMachine() const {
150*9880d681SAndroid Build Coastguard Worker     return getTM<NVPTXTargetMachine>();
151*9880d681SAndroid Build Coastguard Worker   }
152*9880d681SAndroid Build Coastguard Worker 
153*9880d681SAndroid Build Coastguard Worker   void addIRPasses() override;
154*9880d681SAndroid Build Coastguard Worker   bool addInstSelector() override;
155*9880d681SAndroid Build Coastguard Worker   void addPostRegAlloc() override;
156*9880d681SAndroid Build Coastguard Worker   void addMachineSSAOptimization() override;
157*9880d681SAndroid Build Coastguard Worker 
158*9880d681SAndroid Build Coastguard Worker   FunctionPass *createTargetRegisterAllocator(bool) override;
159*9880d681SAndroid Build Coastguard Worker   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
160*9880d681SAndroid Build Coastguard Worker   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
161*9880d681SAndroid Build Coastguard Worker 
162*9880d681SAndroid Build Coastguard Worker private:
163*9880d681SAndroid Build Coastguard Worker   // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
164*9880d681SAndroid Build Coastguard Worker   // function is only called in opt mode.
165*9880d681SAndroid Build Coastguard Worker   void addEarlyCSEOrGVNPass();
166*9880d681SAndroid Build Coastguard Worker 
167*9880d681SAndroid Build Coastguard Worker   // Add passes that propagate special memory spaces.
168*9880d681SAndroid Build Coastguard Worker   void addAddressSpaceInferencePasses();
169*9880d681SAndroid Build Coastguard Worker 
170*9880d681SAndroid Build Coastguard Worker   // Add passes that perform straight-line scalar optimizations.
171*9880d681SAndroid Build Coastguard Worker   void addStraightLineScalarOptimizationPasses();
172*9880d681SAndroid Build Coastguard Worker };
173*9880d681SAndroid Build Coastguard Worker } // end anonymous namespace
174*9880d681SAndroid Build Coastguard Worker 
createPassConfig(PassManagerBase & PM)175*9880d681SAndroid Build Coastguard Worker TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
176*9880d681SAndroid Build Coastguard Worker   return new NVPTXPassConfig(this, PM);
177*9880d681SAndroid Build Coastguard Worker }
178*9880d681SAndroid Build Coastguard Worker 
addEarlyAsPossiblePasses(PassManagerBase & PM)179*9880d681SAndroid Build Coastguard Worker void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
180*9880d681SAndroid Build Coastguard Worker   PM.add(createNVVMReflectPass());
181*9880d681SAndroid Build Coastguard Worker   PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
182*9880d681SAndroid Build Coastguard Worker }
183*9880d681SAndroid Build Coastguard Worker 
getTargetIRAnalysis()184*9880d681SAndroid Build Coastguard Worker TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
185*9880d681SAndroid Build Coastguard Worker   return TargetIRAnalysis([this](const Function &F) {
186*9880d681SAndroid Build Coastguard Worker     return TargetTransformInfo(NVPTXTTIImpl(this, F));
187*9880d681SAndroid Build Coastguard Worker   });
188*9880d681SAndroid Build Coastguard Worker }
189*9880d681SAndroid Build Coastguard Worker 
addEarlyCSEOrGVNPass()190*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
191*9880d681SAndroid Build Coastguard Worker   if (getOptLevel() == CodeGenOpt::Aggressive)
192*9880d681SAndroid Build Coastguard Worker     addPass(createGVNPass());
193*9880d681SAndroid Build Coastguard Worker   else
194*9880d681SAndroid Build Coastguard Worker     addPass(createEarlyCSEPass());
195*9880d681SAndroid Build Coastguard Worker }
196*9880d681SAndroid Build Coastguard Worker 
addAddressSpaceInferencePasses()197*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addAddressSpaceInferencePasses() {
198*9880d681SAndroid Build Coastguard Worker   // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
199*9880d681SAndroid Build Coastguard Worker   // be eliminated by SROA.
200*9880d681SAndroid Build Coastguard Worker   addPass(createSROAPass());
201*9880d681SAndroid Build Coastguard Worker   addPass(createNVPTXLowerAllocaPass());
202*9880d681SAndroid Build Coastguard Worker   if (UseInferAddressSpaces) {
203*9880d681SAndroid Build Coastguard Worker     addPass(createNVPTXInferAddressSpacesPass());
204*9880d681SAndroid Build Coastguard Worker   } else {
205*9880d681SAndroid Build Coastguard Worker     addPass(createNVPTXFavorNonGenericAddrSpacesPass());
206*9880d681SAndroid Build Coastguard Worker     // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
207*9880d681SAndroid Build Coastguard Worker     // them unused. We could remove dead code in an ad-hoc manner, but that
208*9880d681SAndroid Build Coastguard Worker     // requires manual work and might be error-prone.
209*9880d681SAndroid Build Coastguard Worker     addPass(createDeadCodeEliminationPass());
210*9880d681SAndroid Build Coastguard Worker   }
211*9880d681SAndroid Build Coastguard Worker }
212*9880d681SAndroid Build Coastguard Worker 
addStraightLineScalarOptimizationPasses()213*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
214*9880d681SAndroid Build Coastguard Worker   addPass(createSeparateConstOffsetFromGEPPass());
215*9880d681SAndroid Build Coastguard Worker   addPass(createSpeculativeExecutionPass());
216*9880d681SAndroid Build Coastguard Worker   // ReassociateGEPs exposes more opportunites for SLSR. See
217*9880d681SAndroid Build Coastguard Worker   // the example in reassociate-geps-and-slsr.ll.
218*9880d681SAndroid Build Coastguard Worker   addPass(createStraightLineStrengthReducePass());
219*9880d681SAndroid Build Coastguard Worker   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
220*9880d681SAndroid Build Coastguard Worker   // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
221*9880d681SAndroid Build Coastguard Worker   // for some of our benchmarks.
222*9880d681SAndroid Build Coastguard Worker   addEarlyCSEOrGVNPass();
223*9880d681SAndroid Build Coastguard Worker   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
224*9880d681SAndroid Build Coastguard Worker   addPass(createNaryReassociatePass());
225*9880d681SAndroid Build Coastguard Worker   // NaryReassociate on GEPs creates redundant common expressions, so run
226*9880d681SAndroid Build Coastguard Worker   // EarlyCSE after it.
227*9880d681SAndroid Build Coastguard Worker   addPass(createEarlyCSEPass());
228*9880d681SAndroid Build Coastguard Worker }
229*9880d681SAndroid Build Coastguard Worker 
addIRPasses()230*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addIRPasses() {
231*9880d681SAndroid Build Coastguard Worker   // The following passes are known to not play well with virtual regs hanging
232*9880d681SAndroid Build Coastguard Worker   // around after register allocation (which in our case, is *all* registers).
233*9880d681SAndroid Build Coastguard Worker   // We explicitly disable them here.  We do, however, need some functionality
234*9880d681SAndroid Build Coastguard Worker   // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
235*9880d681SAndroid Build Coastguard Worker   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
236*9880d681SAndroid Build Coastguard Worker   disablePass(&PrologEpilogCodeInserterID);
237*9880d681SAndroid Build Coastguard Worker   disablePass(&MachineCopyPropagationID);
238*9880d681SAndroid Build Coastguard Worker   disablePass(&TailDuplicateID);
239*9880d681SAndroid Build Coastguard Worker   disablePass(&StackMapLivenessID);
240*9880d681SAndroid Build Coastguard Worker   disablePass(&LiveDebugValuesID);
241*9880d681SAndroid Build Coastguard Worker   disablePass(&PostRASchedulerID);
242*9880d681SAndroid Build Coastguard Worker   disablePass(&FuncletLayoutID);
243*9880d681SAndroid Build Coastguard Worker   disablePass(&PatchableFunctionID);
244*9880d681SAndroid Build Coastguard Worker 
245*9880d681SAndroid Build Coastguard Worker   // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
246*9880d681SAndroid Build Coastguard Worker   // it here does nothing.  But since we need it for correctness when lowering
247*9880d681SAndroid Build Coastguard Worker   // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
248*9880d681SAndroid Build Coastguard Worker   // call addEarlyAsPossiblePasses.
249*9880d681SAndroid Build Coastguard Worker   addPass(createNVVMReflectPass());
250*9880d681SAndroid Build Coastguard Worker 
251*9880d681SAndroid Build Coastguard Worker   if (getOptLevel() != CodeGenOpt::None)
252*9880d681SAndroid Build Coastguard Worker     addPass(createNVPTXImageOptimizerPass());
253*9880d681SAndroid Build Coastguard Worker   addPass(createNVPTXAssignValidGlobalNamesPass());
254*9880d681SAndroid Build Coastguard Worker   addPass(createGenericToNVVMPass());
255*9880d681SAndroid Build Coastguard Worker 
256*9880d681SAndroid Build Coastguard Worker   // NVPTXLowerKernelArgs is required for correctness and should be run right
257*9880d681SAndroid Build Coastguard Worker   // before the address space inference passes.
258*9880d681SAndroid Build Coastguard Worker   addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
259*9880d681SAndroid Build Coastguard Worker   if (getOptLevel() != CodeGenOpt::None) {
260*9880d681SAndroid Build Coastguard Worker     addAddressSpaceInferencePasses();
261*9880d681SAndroid Build Coastguard Worker     addStraightLineScalarOptimizationPasses();
262*9880d681SAndroid Build Coastguard Worker   }
263*9880d681SAndroid Build Coastguard Worker 
264*9880d681SAndroid Build Coastguard Worker   // === LSR and other generic IR passes ===
265*9880d681SAndroid Build Coastguard Worker   TargetPassConfig::addIRPasses();
266*9880d681SAndroid Build Coastguard Worker   // EarlyCSE is not always strong enough to clean up what LSR produces. For
267*9880d681SAndroid Build Coastguard Worker   // example, GVN can combine
268*9880d681SAndroid Build Coastguard Worker   //
269*9880d681SAndroid Build Coastguard Worker   //   %0 = add %a, %b
270*9880d681SAndroid Build Coastguard Worker   //   %1 = add %b, %a
271*9880d681SAndroid Build Coastguard Worker   //
272*9880d681SAndroid Build Coastguard Worker   // and
273*9880d681SAndroid Build Coastguard Worker   //
274*9880d681SAndroid Build Coastguard Worker   //   %0 = shl nsw %a, 2
275*9880d681SAndroid Build Coastguard Worker   //   %1 = shl %a, 2
276*9880d681SAndroid Build Coastguard Worker   //
277*9880d681SAndroid Build Coastguard Worker   // but EarlyCSE can do neither of them.
278*9880d681SAndroid Build Coastguard Worker   if (getOptLevel() != CodeGenOpt::None)
279*9880d681SAndroid Build Coastguard Worker     addEarlyCSEOrGVNPass();
280*9880d681SAndroid Build Coastguard Worker }
281*9880d681SAndroid Build Coastguard Worker 
addInstSelector()282*9880d681SAndroid Build Coastguard Worker bool NVPTXPassConfig::addInstSelector() {
283*9880d681SAndroid Build Coastguard Worker   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
284*9880d681SAndroid Build Coastguard Worker 
285*9880d681SAndroid Build Coastguard Worker   addPass(createLowerAggrCopies());
286*9880d681SAndroid Build Coastguard Worker   addPass(createAllocaHoisting());
287*9880d681SAndroid Build Coastguard Worker   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
288*9880d681SAndroid Build Coastguard Worker 
289*9880d681SAndroid Build Coastguard Worker   if (!ST.hasImageHandles())
290*9880d681SAndroid Build Coastguard Worker     addPass(createNVPTXReplaceImageHandlesPass());
291*9880d681SAndroid Build Coastguard Worker 
292*9880d681SAndroid Build Coastguard Worker   return false;
293*9880d681SAndroid Build Coastguard Worker }
294*9880d681SAndroid Build Coastguard Worker 
addPostRegAlloc()295*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addPostRegAlloc() {
296*9880d681SAndroid Build Coastguard Worker   addPass(createNVPTXPrologEpilogPass(), false);
297*9880d681SAndroid Build Coastguard Worker   if (getOptLevel() != CodeGenOpt::None) {
298*9880d681SAndroid Build Coastguard Worker     // NVPTXPrologEpilogPass calculates frame object offset and replace frame
299*9880d681SAndroid Build Coastguard Worker     // index with VRFrame register. NVPTXPeephole need to be run after that and
300*9880d681SAndroid Build Coastguard Worker     // will replace VRFrame with VRFrameLocal when possible.
301*9880d681SAndroid Build Coastguard Worker     addPass(createNVPTXPeephole());
302*9880d681SAndroid Build Coastguard Worker   }
303*9880d681SAndroid Build Coastguard Worker }
304*9880d681SAndroid Build Coastguard Worker 
createTargetRegisterAllocator(bool)305*9880d681SAndroid Build Coastguard Worker FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
306*9880d681SAndroid Build Coastguard Worker   return nullptr; // No reg alloc
307*9880d681SAndroid Build Coastguard Worker }
308*9880d681SAndroid Build Coastguard Worker 
addFastRegAlloc(FunctionPass * RegAllocPass)309*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
310*9880d681SAndroid Build Coastguard Worker   assert(!RegAllocPass && "NVPTX uses no regalloc!");
311*9880d681SAndroid Build Coastguard Worker   addPass(&PHIEliminationID);
312*9880d681SAndroid Build Coastguard Worker   addPass(&TwoAddressInstructionPassID);
313*9880d681SAndroid Build Coastguard Worker }
314*9880d681SAndroid Build Coastguard Worker 
addOptimizedRegAlloc(FunctionPass * RegAllocPass)315*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
316*9880d681SAndroid Build Coastguard Worker   assert(!RegAllocPass && "NVPTX uses no regalloc!");
317*9880d681SAndroid Build Coastguard Worker 
318*9880d681SAndroid Build Coastguard Worker   addPass(&ProcessImplicitDefsID);
319*9880d681SAndroid Build Coastguard Worker   addPass(&LiveVariablesID);
320*9880d681SAndroid Build Coastguard Worker   addPass(&MachineLoopInfoID);
321*9880d681SAndroid Build Coastguard Worker   addPass(&PHIEliminationID);
322*9880d681SAndroid Build Coastguard Worker 
323*9880d681SAndroid Build Coastguard Worker   addPass(&TwoAddressInstructionPassID);
324*9880d681SAndroid Build Coastguard Worker   addPass(&RegisterCoalescerID);
325*9880d681SAndroid Build Coastguard Worker 
326*9880d681SAndroid Build Coastguard Worker   // PreRA instruction scheduling.
327*9880d681SAndroid Build Coastguard Worker   if (addPass(&MachineSchedulerID))
328*9880d681SAndroid Build Coastguard Worker     printAndVerify("After Machine Scheduling");
329*9880d681SAndroid Build Coastguard Worker 
330*9880d681SAndroid Build Coastguard Worker 
331*9880d681SAndroid Build Coastguard Worker   addPass(&StackSlotColoringID);
332*9880d681SAndroid Build Coastguard Worker 
333*9880d681SAndroid Build Coastguard Worker   // FIXME: Needs physical registers
334*9880d681SAndroid Build Coastguard Worker   //addPass(&PostRAMachineLICMID);
335*9880d681SAndroid Build Coastguard Worker 
336*9880d681SAndroid Build Coastguard Worker   printAndVerify("After StackSlotColoring");
337*9880d681SAndroid Build Coastguard Worker }
338*9880d681SAndroid Build Coastguard Worker 
addMachineSSAOptimization()339*9880d681SAndroid Build Coastguard Worker void NVPTXPassConfig::addMachineSSAOptimization() {
340*9880d681SAndroid Build Coastguard Worker   // Pre-ra tail duplication.
341*9880d681SAndroid Build Coastguard Worker   if (addPass(&EarlyTailDuplicateID))
342*9880d681SAndroid Build Coastguard Worker     printAndVerify("After Pre-RegAlloc TailDuplicate");
343*9880d681SAndroid Build Coastguard Worker 
344*9880d681SAndroid Build Coastguard Worker   // Optimize PHIs before DCE: removing dead PHI cycles may make more
345*9880d681SAndroid Build Coastguard Worker   // instructions dead.
346*9880d681SAndroid Build Coastguard Worker   addPass(&OptimizePHIsID);
347*9880d681SAndroid Build Coastguard Worker 
348*9880d681SAndroid Build Coastguard Worker   // This pass merges large allocas. StackSlotColoring is a different pass
349*9880d681SAndroid Build Coastguard Worker   // which merges spill slots.
350*9880d681SAndroid Build Coastguard Worker   addPass(&StackColoringID);
351*9880d681SAndroid Build Coastguard Worker 
352*9880d681SAndroid Build Coastguard Worker   // If the target requests it, assign local variables to stack slots relative
353*9880d681SAndroid Build Coastguard Worker   // to one another and simplify frame index references where possible.
354*9880d681SAndroid Build Coastguard Worker   addPass(&LocalStackSlotAllocationID);
355*9880d681SAndroid Build Coastguard Worker 
356*9880d681SAndroid Build Coastguard Worker   // With optimization, dead code should already be eliminated. However
357*9880d681SAndroid Build Coastguard Worker   // there is one known exception: lowered code for arguments that are only
358*9880d681SAndroid Build Coastguard Worker   // used by tail calls, where the tail calls reuse the incoming stack
359*9880d681SAndroid Build Coastguard Worker   // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
360*9880d681SAndroid Build Coastguard Worker   addPass(&DeadMachineInstructionElimID);
361*9880d681SAndroid Build Coastguard Worker   printAndVerify("After codegen DCE pass");
362*9880d681SAndroid Build Coastguard Worker 
363*9880d681SAndroid Build Coastguard Worker   // Allow targets to insert passes that improve instruction level parallelism,
364*9880d681SAndroid Build Coastguard Worker   // like if-conversion. Such passes will typically need dominator trees and
365*9880d681SAndroid Build Coastguard Worker   // loop info, just like LICM and CSE below.
366*9880d681SAndroid Build Coastguard Worker   if (addILPOpts())
367*9880d681SAndroid Build Coastguard Worker     printAndVerify("After ILP optimizations");
368*9880d681SAndroid Build Coastguard Worker 
369*9880d681SAndroid Build Coastguard Worker   addPass(&MachineLICMID);
370*9880d681SAndroid Build Coastguard Worker   addPass(&MachineCSEID);
371*9880d681SAndroid Build Coastguard Worker 
372*9880d681SAndroid Build Coastguard Worker   addPass(&MachineSinkingID);
373*9880d681SAndroid Build Coastguard Worker   printAndVerify("After Machine LICM, CSE and Sinking passes");
374*9880d681SAndroid Build Coastguard Worker 
375*9880d681SAndroid Build Coastguard Worker   addPass(&PeepholeOptimizerID);
376*9880d681SAndroid Build Coastguard Worker   printAndVerify("After codegen peephole optimization pass");
377*9880d681SAndroid Build Coastguard Worker }
378