xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This pass eliminates allocas by either converting them into vectors or
11*9880d681SAndroid Build Coastguard Worker // by migrating them to local address space.
12*9880d681SAndroid Build Coastguard Worker //
13*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
14*9880d681SAndroid Build Coastguard Worker 
15*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
16*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/ValueTracking.h"
18*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/IRBuilder.h"
19*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/IntrinsicInst.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/MDBuilder.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
23*9880d681SAndroid Build Coastguard Worker 
24*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "amdgpu-promote-alloca"
25*9880d681SAndroid Build Coastguard Worker 
26*9880d681SAndroid Build Coastguard Worker using namespace llvm;
27*9880d681SAndroid Build Coastguard Worker 
28*9880d681SAndroid Build Coastguard Worker namespace {
29*9880d681SAndroid Build Coastguard Worker 
30*9880d681SAndroid Build Coastguard Worker // FIXME: This can create globals so should be a module pass.
31*9880d681SAndroid Build Coastguard Worker class AMDGPUPromoteAlloca : public FunctionPass {
32*9880d681SAndroid Build Coastguard Worker private:
33*9880d681SAndroid Build Coastguard Worker   const TargetMachine *TM;
34*9880d681SAndroid Build Coastguard Worker   Module *Mod;
35*9880d681SAndroid Build Coastguard Worker   const DataLayout *DL;
36*9880d681SAndroid Build Coastguard Worker   MDNode *MaxWorkGroupSizeRange;
37*9880d681SAndroid Build Coastguard Worker 
38*9880d681SAndroid Build Coastguard Worker   // FIXME: This should be per-kernel.
39*9880d681SAndroid Build Coastguard Worker   uint32_t LocalMemLimit;
40*9880d681SAndroid Build Coastguard Worker   uint32_t CurrentLocalMemUsage;
41*9880d681SAndroid Build Coastguard Worker 
42*9880d681SAndroid Build Coastguard Worker   bool IsAMDGCN;
43*9880d681SAndroid Build Coastguard Worker   bool IsAMDHSA;
44*9880d681SAndroid Build Coastguard Worker 
45*9880d681SAndroid Build Coastguard Worker   std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
46*9880d681SAndroid Build Coastguard Worker   Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
47*9880d681SAndroid Build Coastguard Worker 
48*9880d681SAndroid Build Coastguard Worker   /// BaseAlloca is the alloca root the search started from.
49*9880d681SAndroid Build Coastguard Worker   /// Val may be that alloca or a recursive user of it.
50*9880d681SAndroid Build Coastguard Worker   bool collectUsesWithPtrTypes(Value *BaseAlloca,
51*9880d681SAndroid Build Coastguard Worker                                Value *Val,
52*9880d681SAndroid Build Coastguard Worker                                std::vector<Value*> &WorkList) const;
53*9880d681SAndroid Build Coastguard Worker 
54*9880d681SAndroid Build Coastguard Worker   /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
55*9880d681SAndroid Build Coastguard Worker   /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
56*9880d681SAndroid Build Coastguard Worker   /// Returns true if both operands are derived from the same alloca. Val should
57*9880d681SAndroid Build Coastguard Worker   /// be the same value as one of the input operands of UseInst.
58*9880d681SAndroid Build Coastguard Worker   bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
59*9880d681SAndroid Build Coastguard Worker                                        Instruction *UseInst,
60*9880d681SAndroid Build Coastguard Worker                                        int OpIdx0, int OpIdx1) const;
61*9880d681SAndroid Build Coastguard Worker 
62*9880d681SAndroid Build Coastguard Worker public:
63*9880d681SAndroid Build Coastguard Worker   static char ID;
64*9880d681SAndroid Build Coastguard Worker 
AMDGPUPromoteAlloca(const TargetMachine * TM_=nullptr)65*9880d681SAndroid Build Coastguard Worker   AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
66*9880d681SAndroid Build Coastguard Worker     FunctionPass(ID),
67*9880d681SAndroid Build Coastguard Worker     TM(TM_),
68*9880d681SAndroid Build Coastguard Worker     Mod(nullptr),
69*9880d681SAndroid Build Coastguard Worker     DL(nullptr),
70*9880d681SAndroid Build Coastguard Worker     MaxWorkGroupSizeRange(nullptr),
71*9880d681SAndroid Build Coastguard Worker     LocalMemLimit(0),
72*9880d681SAndroid Build Coastguard Worker     CurrentLocalMemUsage(0),
73*9880d681SAndroid Build Coastguard Worker     IsAMDGCN(false),
74*9880d681SAndroid Build Coastguard Worker     IsAMDHSA(false) { }
75*9880d681SAndroid Build Coastguard Worker 
76*9880d681SAndroid Build Coastguard Worker   bool doInitialization(Module &M) override;
77*9880d681SAndroid Build Coastguard Worker   bool runOnFunction(Function &F) override;
78*9880d681SAndroid Build Coastguard Worker 
getPassName() const79*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
80*9880d681SAndroid Build Coastguard Worker     return "AMDGPU Promote Alloca";
81*9880d681SAndroid Build Coastguard Worker   }
82*9880d681SAndroid Build Coastguard Worker 
83*9880d681SAndroid Build Coastguard Worker   void handleAlloca(AllocaInst &I);
84*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const85*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
86*9880d681SAndroid Build Coastguard Worker     AU.setPreservesCFG();
87*9880d681SAndroid Build Coastguard Worker     FunctionPass::getAnalysisUsage(AU);
88*9880d681SAndroid Build Coastguard Worker   }
89*9880d681SAndroid Build Coastguard Worker };
90*9880d681SAndroid Build Coastguard Worker 
91*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace
92*9880d681SAndroid Build Coastguard Worker 
93*9880d681SAndroid Build Coastguard Worker char AMDGPUPromoteAlloca::ID = 0;
94*9880d681SAndroid Build Coastguard Worker 
95*9880d681SAndroid Build Coastguard Worker INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
96*9880d681SAndroid Build Coastguard Worker                    "AMDGPU promote alloca to vector or LDS", false, false)
97*9880d681SAndroid Build Coastguard Worker 
98*9880d681SAndroid Build Coastguard Worker char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
99*9880d681SAndroid Build Coastguard Worker 
100*9880d681SAndroid Build Coastguard Worker 
doInitialization(Module & M)101*9880d681SAndroid Build Coastguard Worker bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
102*9880d681SAndroid Build Coastguard Worker   if (!TM)
103*9880d681SAndroid Build Coastguard Worker     return false;
104*9880d681SAndroid Build Coastguard Worker 
105*9880d681SAndroid Build Coastguard Worker   Mod = &M;
106*9880d681SAndroid Build Coastguard Worker   DL = &Mod->getDataLayout();
107*9880d681SAndroid Build Coastguard Worker 
108*9880d681SAndroid Build Coastguard Worker   // The maximum workitem id.
109*9880d681SAndroid Build Coastguard Worker   //
110*9880d681SAndroid Build Coastguard Worker   // FIXME: Should get as subtarget property. Usually runtime enforced max is
111*9880d681SAndroid Build Coastguard Worker   // 256.
112*9880d681SAndroid Build Coastguard Worker   MDBuilder MDB(Mod->getContext());
113*9880d681SAndroid Build Coastguard Worker   MaxWorkGroupSizeRange = MDB.createRange(APInt(32, 0), APInt(32, 2048));
114*9880d681SAndroid Build Coastguard Worker 
115*9880d681SAndroid Build Coastguard Worker   const Triple &TT = TM->getTargetTriple();
116*9880d681SAndroid Build Coastguard Worker 
117*9880d681SAndroid Build Coastguard Worker   IsAMDGCN = TT.getArch() == Triple::amdgcn;
118*9880d681SAndroid Build Coastguard Worker   IsAMDHSA = TT.getOS() == Triple::AMDHSA;
119*9880d681SAndroid Build Coastguard Worker 
120*9880d681SAndroid Build Coastguard Worker   return false;
121*9880d681SAndroid Build Coastguard Worker }
122*9880d681SAndroid Build Coastguard Worker 
runOnFunction(Function & F)123*9880d681SAndroid Build Coastguard Worker bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
124*9880d681SAndroid Build Coastguard Worker   if (!TM || skipFunction(F))
125*9880d681SAndroid Build Coastguard Worker     return false;
126*9880d681SAndroid Build Coastguard Worker 
127*9880d681SAndroid Build Coastguard Worker   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
128*9880d681SAndroid Build Coastguard Worker   if (!ST.isPromoteAllocaEnabled())
129*9880d681SAndroid Build Coastguard Worker     return false;
130*9880d681SAndroid Build Coastguard Worker 
131*9880d681SAndroid Build Coastguard Worker   FunctionType *FTy = F.getFunctionType();
132*9880d681SAndroid Build Coastguard Worker 
133*9880d681SAndroid Build Coastguard Worker   // If the function has any arguments in the local address space, then it's
134*9880d681SAndroid Build Coastguard Worker   // possible these arguments require the entire local memory space, so
135*9880d681SAndroid Build Coastguard Worker   // we cannot use local memory in the pass.
136*9880d681SAndroid Build Coastguard Worker   for (Type *ParamTy : FTy->params()) {
137*9880d681SAndroid Build Coastguard Worker     PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
138*9880d681SAndroid Build Coastguard Worker     if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
139*9880d681SAndroid Build Coastguard Worker       LocalMemLimit = 0;
140*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "Function has local memory argument. Promoting to "
141*9880d681SAndroid Build Coastguard Worker                       "local memory disabled.\n");
142*9880d681SAndroid Build Coastguard Worker       return false;
143*9880d681SAndroid Build Coastguard Worker     }
144*9880d681SAndroid Build Coastguard Worker   }
145*9880d681SAndroid Build Coastguard Worker 
146*9880d681SAndroid Build Coastguard Worker   LocalMemLimit = ST.getLocalMemorySize();
147*9880d681SAndroid Build Coastguard Worker   if (LocalMemLimit == 0)
148*9880d681SAndroid Build Coastguard Worker     return false;
149*9880d681SAndroid Build Coastguard Worker 
150*9880d681SAndroid Build Coastguard Worker   const DataLayout &DL = Mod->getDataLayout();
151*9880d681SAndroid Build Coastguard Worker 
152*9880d681SAndroid Build Coastguard Worker   // Check how much local memory is being used by global objects
153*9880d681SAndroid Build Coastguard Worker   CurrentLocalMemUsage = 0;
154*9880d681SAndroid Build Coastguard Worker   for (GlobalVariable &GV : Mod->globals()) {
155*9880d681SAndroid Build Coastguard Worker     if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
156*9880d681SAndroid Build Coastguard Worker       continue;
157*9880d681SAndroid Build Coastguard Worker 
158*9880d681SAndroid Build Coastguard Worker     for (const User *U : GV.users()) {
159*9880d681SAndroid Build Coastguard Worker       const Instruction *Use = dyn_cast<Instruction>(U);
160*9880d681SAndroid Build Coastguard Worker       if (!Use)
161*9880d681SAndroid Build Coastguard Worker         continue;
162*9880d681SAndroid Build Coastguard Worker 
163*9880d681SAndroid Build Coastguard Worker       if (Use->getParent()->getParent() == &F) {
164*9880d681SAndroid Build Coastguard Worker         unsigned Align = GV.getAlignment();
165*9880d681SAndroid Build Coastguard Worker         if (Align == 0)
166*9880d681SAndroid Build Coastguard Worker           Align = DL.getABITypeAlignment(GV.getValueType());
167*9880d681SAndroid Build Coastguard Worker 
168*9880d681SAndroid Build Coastguard Worker         // FIXME: Try to account for padding here. The padding is currently
169*9880d681SAndroid Build Coastguard Worker         // determined from the inverse order of uses in the function. I'm not
170*9880d681SAndroid Build Coastguard Worker         // sure if the use list order is in any way connected to this, so the
171*9880d681SAndroid Build Coastguard Worker         // total reported size is likely incorrect.
172*9880d681SAndroid Build Coastguard Worker         uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
173*9880d681SAndroid Build Coastguard Worker         CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
174*9880d681SAndroid Build Coastguard Worker         CurrentLocalMemUsage += AllocSize;
175*9880d681SAndroid Build Coastguard Worker         break;
176*9880d681SAndroid Build Coastguard Worker       }
177*9880d681SAndroid Build Coastguard Worker     }
178*9880d681SAndroid Build Coastguard Worker   }
179*9880d681SAndroid Build Coastguard Worker 
180*9880d681SAndroid Build Coastguard Worker   unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage);
181*9880d681SAndroid Build Coastguard Worker 
182*9880d681SAndroid Build Coastguard Worker   // Restrict local memory usage so that we don't drastically reduce occupancy,
183*9880d681SAndroid Build Coastguard Worker   // unless it is already significantly reduced.
184*9880d681SAndroid Build Coastguard Worker 
185*9880d681SAndroid Build Coastguard Worker   // TODO: Have some sort of hint or other heuristics to guess occupancy based
186*9880d681SAndroid Build Coastguard Worker   // on other factors..
187*9880d681SAndroid Build Coastguard Worker   unsigned OccupancyHint
188*9880d681SAndroid Build Coastguard Worker     = AMDGPU::getIntegerAttribute(F, "amdgpu-max-waves-per-eu", 0);
189*9880d681SAndroid Build Coastguard Worker   if (OccupancyHint == 0)
190*9880d681SAndroid Build Coastguard Worker     OccupancyHint = 7;
191*9880d681SAndroid Build Coastguard Worker 
192*9880d681SAndroid Build Coastguard Worker   // Clamp to max value.
193*9880d681SAndroid Build Coastguard Worker   OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerCU());
194*9880d681SAndroid Build Coastguard Worker 
195*9880d681SAndroid Build Coastguard Worker   // Check the hint but ignore it if it's obviously wrong from the existing LDS
196*9880d681SAndroid Build Coastguard Worker   // usage.
197*9880d681SAndroid Build Coastguard Worker   MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
198*9880d681SAndroid Build Coastguard Worker 
199*9880d681SAndroid Build Coastguard Worker 
200*9880d681SAndroid Build Coastguard Worker   // Round up to the next tier of usage.
201*9880d681SAndroid Build Coastguard Worker   unsigned MaxSizeWithWaveCount
202*9880d681SAndroid Build Coastguard Worker     = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy);
203*9880d681SAndroid Build Coastguard Worker 
204*9880d681SAndroid Build Coastguard Worker   // Program is possibly broken by using more local mem than available.
205*9880d681SAndroid Build Coastguard Worker   if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
206*9880d681SAndroid Build Coastguard Worker     return false;
207*9880d681SAndroid Build Coastguard Worker 
208*9880d681SAndroid Build Coastguard Worker   LocalMemLimit = MaxSizeWithWaveCount;
209*9880d681SAndroid Build Coastguard Worker 
210*9880d681SAndroid Build Coastguard Worker   DEBUG(
211*9880d681SAndroid Build Coastguard Worker     dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
212*9880d681SAndroid Build Coastguard Worker     << "  Rounding size to " << MaxSizeWithWaveCount
213*9880d681SAndroid Build Coastguard Worker     << " with a maximum occupancy of " << MaxOccupancy << '\n'
214*9880d681SAndroid Build Coastguard Worker     << " and " << (LocalMemLimit - CurrentLocalMemUsage)
215*9880d681SAndroid Build Coastguard Worker     << " available for promotion\n"
216*9880d681SAndroid Build Coastguard Worker   );
217*9880d681SAndroid Build Coastguard Worker 
218*9880d681SAndroid Build Coastguard Worker   BasicBlock &EntryBB = *F.begin();
219*9880d681SAndroid Build Coastguard Worker   for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
220*9880d681SAndroid Build Coastguard Worker     AllocaInst *AI = dyn_cast<AllocaInst>(I);
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker     ++I;
223*9880d681SAndroid Build Coastguard Worker     if (AI)
224*9880d681SAndroid Build Coastguard Worker       handleAlloca(*AI);
225*9880d681SAndroid Build Coastguard Worker   }
226*9880d681SAndroid Build Coastguard Worker 
227*9880d681SAndroid Build Coastguard Worker   return true;
228*9880d681SAndroid Build Coastguard Worker }
229*9880d681SAndroid Build Coastguard Worker 
230*9880d681SAndroid Build Coastguard Worker std::pair<Value *, Value *>
getLocalSizeYZ(IRBuilder<> & Builder)231*9880d681SAndroid Build Coastguard Worker AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
232*9880d681SAndroid Build Coastguard Worker   if (!IsAMDHSA) {
233*9880d681SAndroid Build Coastguard Worker     Function *LocalSizeYFn
234*9880d681SAndroid Build Coastguard Worker       = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
235*9880d681SAndroid Build Coastguard Worker     Function *LocalSizeZFn
236*9880d681SAndroid Build Coastguard Worker       = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
237*9880d681SAndroid Build Coastguard Worker 
238*9880d681SAndroid Build Coastguard Worker     CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
239*9880d681SAndroid Build Coastguard Worker     CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
240*9880d681SAndroid Build Coastguard Worker 
241*9880d681SAndroid Build Coastguard Worker     LocalSizeY->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
242*9880d681SAndroid Build Coastguard Worker     LocalSizeZ->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
243*9880d681SAndroid Build Coastguard Worker 
244*9880d681SAndroid Build Coastguard Worker     return std::make_pair(LocalSizeY, LocalSizeZ);
245*9880d681SAndroid Build Coastguard Worker   }
246*9880d681SAndroid Build Coastguard Worker 
247*9880d681SAndroid Build Coastguard Worker   // We must read the size out of the dispatch pointer.
248*9880d681SAndroid Build Coastguard Worker   assert(IsAMDGCN);
249*9880d681SAndroid Build Coastguard Worker 
250*9880d681SAndroid Build Coastguard Worker   // We are indexing into this struct, and want to extract the workgroup_size_*
251*9880d681SAndroid Build Coastguard Worker   // fields.
252*9880d681SAndroid Build Coastguard Worker   //
253*9880d681SAndroid Build Coastguard Worker   //   typedef struct hsa_kernel_dispatch_packet_s {
254*9880d681SAndroid Build Coastguard Worker   //     uint16_t header;
255*9880d681SAndroid Build Coastguard Worker   //     uint16_t setup;
256*9880d681SAndroid Build Coastguard Worker   //     uint16_t workgroup_size_x ;
257*9880d681SAndroid Build Coastguard Worker   //     uint16_t workgroup_size_y;
258*9880d681SAndroid Build Coastguard Worker   //     uint16_t workgroup_size_z;
259*9880d681SAndroid Build Coastguard Worker   //     uint16_t reserved0;
260*9880d681SAndroid Build Coastguard Worker   //     uint32_t grid_size_x ;
261*9880d681SAndroid Build Coastguard Worker   //     uint32_t grid_size_y ;
262*9880d681SAndroid Build Coastguard Worker   //     uint32_t grid_size_z;
263*9880d681SAndroid Build Coastguard Worker   //
264*9880d681SAndroid Build Coastguard Worker   //     uint32_t private_segment_size;
265*9880d681SAndroid Build Coastguard Worker   //     uint32_t group_segment_size;
266*9880d681SAndroid Build Coastguard Worker   //     uint64_t kernel_object;
267*9880d681SAndroid Build Coastguard Worker   //
268*9880d681SAndroid Build Coastguard Worker   // #ifdef HSA_LARGE_MODEL
269*9880d681SAndroid Build Coastguard Worker   //     void *kernarg_address;
270*9880d681SAndroid Build Coastguard Worker   // #elif defined HSA_LITTLE_ENDIAN
271*9880d681SAndroid Build Coastguard Worker   //     void *kernarg_address;
272*9880d681SAndroid Build Coastguard Worker   //     uint32_t reserved1;
273*9880d681SAndroid Build Coastguard Worker   // #else
274*9880d681SAndroid Build Coastguard Worker   //     uint32_t reserved1;
275*9880d681SAndroid Build Coastguard Worker   //     void *kernarg_address;
276*9880d681SAndroid Build Coastguard Worker   // #endif
277*9880d681SAndroid Build Coastguard Worker   //     uint64_t reserved2;
278*9880d681SAndroid Build Coastguard Worker   //     hsa_signal_t completion_signal; // uint64_t wrapper
279*9880d681SAndroid Build Coastguard Worker   //   } hsa_kernel_dispatch_packet_t
280*9880d681SAndroid Build Coastguard Worker   //
281*9880d681SAndroid Build Coastguard Worker   Function *DispatchPtrFn
282*9880d681SAndroid Build Coastguard Worker     = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
283*9880d681SAndroid Build Coastguard Worker 
284*9880d681SAndroid Build Coastguard Worker   CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
285*9880d681SAndroid Build Coastguard Worker   DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
286*9880d681SAndroid Build Coastguard Worker   DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
287*9880d681SAndroid Build Coastguard Worker 
288*9880d681SAndroid Build Coastguard Worker   // Size of the dispatch packet struct.
289*9880d681SAndroid Build Coastguard Worker   DispatchPtr->addDereferenceableAttr(AttributeSet::ReturnIndex, 64);
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker   Type *I32Ty = Type::getInt32Ty(Mod->getContext());
292*9880d681SAndroid Build Coastguard Worker   Value *CastDispatchPtr = Builder.CreateBitCast(
293*9880d681SAndroid Build Coastguard Worker     DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
294*9880d681SAndroid Build Coastguard Worker 
295*9880d681SAndroid Build Coastguard Worker   // We could do a single 64-bit load here, but it's likely that the basic
296*9880d681SAndroid Build Coastguard Worker   // 32-bit and extract sequence is already present, and it is probably easier
297*9880d681SAndroid Build Coastguard Worker   // to CSE this. The loads should be mergable later anyway.
298*9880d681SAndroid Build Coastguard Worker   Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
299*9880d681SAndroid Build Coastguard Worker   LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
300*9880d681SAndroid Build Coastguard Worker 
301*9880d681SAndroid Build Coastguard Worker   Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
302*9880d681SAndroid Build Coastguard Worker   LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
303*9880d681SAndroid Build Coastguard Worker 
304*9880d681SAndroid Build Coastguard Worker   MDNode *MD = llvm::MDNode::get(Mod->getContext(), None);
305*9880d681SAndroid Build Coastguard Worker   LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
306*9880d681SAndroid Build Coastguard Worker   LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
307*9880d681SAndroid Build Coastguard Worker   LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
308*9880d681SAndroid Build Coastguard Worker 
309*9880d681SAndroid Build Coastguard Worker   // Extract y component. Upper half of LoadZU should be zero already.
310*9880d681SAndroid Build Coastguard Worker   Value *Y = Builder.CreateLShr(LoadXY, 16);
311*9880d681SAndroid Build Coastguard Worker 
312*9880d681SAndroid Build Coastguard Worker   return std::make_pair(Y, LoadZU);
313*9880d681SAndroid Build Coastguard Worker }
314*9880d681SAndroid Build Coastguard Worker 
getWorkitemID(IRBuilder<> & Builder,unsigned N)315*9880d681SAndroid Build Coastguard Worker Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
316*9880d681SAndroid Build Coastguard Worker   Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
317*9880d681SAndroid Build Coastguard Worker 
318*9880d681SAndroid Build Coastguard Worker   switch (N) {
319*9880d681SAndroid Build Coastguard Worker   case 0:
320*9880d681SAndroid Build Coastguard Worker     IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
321*9880d681SAndroid Build Coastguard Worker       : Intrinsic::r600_read_tidig_x;
322*9880d681SAndroid Build Coastguard Worker     break;
323*9880d681SAndroid Build Coastguard Worker   case 1:
324*9880d681SAndroid Build Coastguard Worker     IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
325*9880d681SAndroid Build Coastguard Worker       : Intrinsic::r600_read_tidig_y;
326*9880d681SAndroid Build Coastguard Worker     break;
327*9880d681SAndroid Build Coastguard Worker 
328*9880d681SAndroid Build Coastguard Worker   case 2:
329*9880d681SAndroid Build Coastguard Worker     IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
330*9880d681SAndroid Build Coastguard Worker       : Intrinsic::r600_read_tidig_z;
331*9880d681SAndroid Build Coastguard Worker     break;
332*9880d681SAndroid Build Coastguard Worker   default:
333*9880d681SAndroid Build Coastguard Worker     llvm_unreachable("invalid dimension");
334*9880d681SAndroid Build Coastguard Worker   }
335*9880d681SAndroid Build Coastguard Worker 
336*9880d681SAndroid Build Coastguard Worker   Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
337*9880d681SAndroid Build Coastguard Worker   CallInst *CI = Builder.CreateCall(WorkitemIdFn);
338*9880d681SAndroid Build Coastguard Worker   CI->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
339*9880d681SAndroid Build Coastguard Worker 
340*9880d681SAndroid Build Coastguard Worker   return CI;
341*9880d681SAndroid Build Coastguard Worker }
342*9880d681SAndroid Build Coastguard Worker 
arrayTypeToVecType(Type * ArrayTy)343*9880d681SAndroid Build Coastguard Worker static VectorType *arrayTypeToVecType(Type *ArrayTy) {
344*9880d681SAndroid Build Coastguard Worker   return VectorType::get(ArrayTy->getArrayElementType(),
345*9880d681SAndroid Build Coastguard Worker                          ArrayTy->getArrayNumElements());
346*9880d681SAndroid Build Coastguard Worker }
347*9880d681SAndroid Build Coastguard Worker 
348*9880d681SAndroid Build Coastguard Worker static Value *
calculateVectorIndex(Value * Ptr,const std::map<GetElementPtrInst *,Value * > & GEPIdx)349*9880d681SAndroid Build Coastguard Worker calculateVectorIndex(Value *Ptr,
350*9880d681SAndroid Build Coastguard Worker                      const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
351*9880d681SAndroid Build Coastguard Worker   if (isa<AllocaInst>(Ptr))
352*9880d681SAndroid Build Coastguard Worker     return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
353*9880d681SAndroid Build Coastguard Worker 
354*9880d681SAndroid Build Coastguard Worker   GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
355*9880d681SAndroid Build Coastguard Worker 
356*9880d681SAndroid Build Coastguard Worker   auto I = GEPIdx.find(GEP);
357*9880d681SAndroid Build Coastguard Worker   return I == GEPIdx.end() ? nullptr : I->second;
358*9880d681SAndroid Build Coastguard Worker }
359*9880d681SAndroid Build Coastguard Worker 
GEPToVectorIndex(GetElementPtrInst * GEP)360*9880d681SAndroid Build Coastguard Worker static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
361*9880d681SAndroid Build Coastguard Worker   // FIXME we only support simple cases
362*9880d681SAndroid Build Coastguard Worker   if (GEP->getNumOperands() != 3)
363*9880d681SAndroid Build Coastguard Worker     return NULL;
364*9880d681SAndroid Build Coastguard Worker 
365*9880d681SAndroid Build Coastguard Worker   ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
366*9880d681SAndroid Build Coastguard Worker   if (!I0 || !I0->isZero())
367*9880d681SAndroid Build Coastguard Worker     return NULL;
368*9880d681SAndroid Build Coastguard Worker 
369*9880d681SAndroid Build Coastguard Worker   return GEP->getOperand(2);
370*9880d681SAndroid Build Coastguard Worker }
371*9880d681SAndroid Build Coastguard Worker 
372*9880d681SAndroid Build Coastguard Worker // Not an instruction handled below to turn into a vector.
373*9880d681SAndroid Build Coastguard Worker //
374*9880d681SAndroid Build Coastguard Worker // TODO: Check isTriviallyVectorizable for calls and handle other
375*9880d681SAndroid Build Coastguard Worker // instructions.
canVectorizeInst(Instruction * Inst,User * User)376*9880d681SAndroid Build Coastguard Worker static bool canVectorizeInst(Instruction *Inst, User *User) {
377*9880d681SAndroid Build Coastguard Worker   switch (Inst->getOpcode()) {
378*9880d681SAndroid Build Coastguard Worker   case Instruction::Load:
379*9880d681SAndroid Build Coastguard Worker   case Instruction::BitCast:
380*9880d681SAndroid Build Coastguard Worker   case Instruction::AddrSpaceCast:
381*9880d681SAndroid Build Coastguard Worker     return true;
382*9880d681SAndroid Build Coastguard Worker   case Instruction::Store: {
383*9880d681SAndroid Build Coastguard Worker     // Must be the stored pointer operand, not a stored value.
384*9880d681SAndroid Build Coastguard Worker     StoreInst *SI = cast<StoreInst>(Inst);
385*9880d681SAndroid Build Coastguard Worker     return SI->getPointerOperand() == User;
386*9880d681SAndroid Build Coastguard Worker   }
387*9880d681SAndroid Build Coastguard Worker   default:
388*9880d681SAndroid Build Coastguard Worker     return false;
389*9880d681SAndroid Build Coastguard Worker   }
390*9880d681SAndroid Build Coastguard Worker }
391*9880d681SAndroid Build Coastguard Worker 
tryPromoteAllocaToVector(AllocaInst * Alloca)392*9880d681SAndroid Build Coastguard Worker static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
393*9880d681SAndroid Build Coastguard Worker   ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
394*9880d681SAndroid Build Coastguard Worker 
395*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Alloca candidate for vectorization\n");
396*9880d681SAndroid Build Coastguard Worker 
397*9880d681SAndroid Build Coastguard Worker   // FIXME: There is no reason why we can't support larger arrays, we
398*9880d681SAndroid Build Coastguard Worker   // are just being conservative for now.
399*9880d681SAndroid Build Coastguard Worker   if (!AllocaTy ||
400*9880d681SAndroid Build Coastguard Worker       AllocaTy->getElementType()->isVectorTy() ||
401*9880d681SAndroid Build Coastguard Worker       AllocaTy->getNumElements() > 4) {
402*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "  Cannot convert type to vector\n");
403*9880d681SAndroid Build Coastguard Worker     return false;
404*9880d681SAndroid Build Coastguard Worker   }
405*9880d681SAndroid Build Coastguard Worker 
406*9880d681SAndroid Build Coastguard Worker   std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
407*9880d681SAndroid Build Coastguard Worker   std::vector<Value*> WorkList;
408*9880d681SAndroid Build Coastguard Worker   for (User *AllocaUser : Alloca->users()) {
409*9880d681SAndroid Build Coastguard Worker     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
410*9880d681SAndroid Build Coastguard Worker     if (!GEP) {
411*9880d681SAndroid Build Coastguard Worker       if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
412*9880d681SAndroid Build Coastguard Worker         return false;
413*9880d681SAndroid Build Coastguard Worker 
414*9880d681SAndroid Build Coastguard Worker       WorkList.push_back(AllocaUser);
415*9880d681SAndroid Build Coastguard Worker       continue;
416*9880d681SAndroid Build Coastguard Worker     }
417*9880d681SAndroid Build Coastguard Worker 
418*9880d681SAndroid Build Coastguard Worker     Value *Index = GEPToVectorIndex(GEP);
419*9880d681SAndroid Build Coastguard Worker 
420*9880d681SAndroid Build Coastguard Worker     // If we can't compute a vector index from this GEP, then we can't
421*9880d681SAndroid Build Coastguard Worker     // promote this alloca to vector.
422*9880d681SAndroid Build Coastguard Worker     if (!Index) {
423*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "  Cannot compute vector index for GEP " << *GEP << '\n');
424*9880d681SAndroid Build Coastguard Worker       return false;
425*9880d681SAndroid Build Coastguard Worker     }
426*9880d681SAndroid Build Coastguard Worker 
427*9880d681SAndroid Build Coastguard Worker     GEPVectorIdx[GEP] = Index;
428*9880d681SAndroid Build Coastguard Worker     for (User *GEPUser : AllocaUser->users()) {
429*9880d681SAndroid Build Coastguard Worker       if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
430*9880d681SAndroid Build Coastguard Worker         return false;
431*9880d681SAndroid Build Coastguard Worker 
432*9880d681SAndroid Build Coastguard Worker       WorkList.push_back(GEPUser);
433*9880d681SAndroid Build Coastguard Worker     }
434*9880d681SAndroid Build Coastguard Worker   }
435*9880d681SAndroid Build Coastguard Worker 
436*9880d681SAndroid Build Coastguard Worker   VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
437*9880d681SAndroid Build Coastguard Worker 
438*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "  Converting alloca to vector "
439*9880d681SAndroid Build Coastguard Worker         << *AllocaTy << " -> " << *VectorTy << '\n');
440*9880d681SAndroid Build Coastguard Worker 
441*9880d681SAndroid Build Coastguard Worker   for (Value *V : WorkList) {
442*9880d681SAndroid Build Coastguard Worker     Instruction *Inst = cast<Instruction>(V);
443*9880d681SAndroid Build Coastguard Worker     IRBuilder<> Builder(Inst);
444*9880d681SAndroid Build Coastguard Worker     switch (Inst->getOpcode()) {
445*9880d681SAndroid Build Coastguard Worker     case Instruction::Load: {
446*9880d681SAndroid Build Coastguard Worker       Value *Ptr = Inst->getOperand(0);
447*9880d681SAndroid Build Coastguard Worker       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
448*9880d681SAndroid Build Coastguard Worker       Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
449*9880d681SAndroid Build Coastguard Worker       Value *VecValue = Builder.CreateLoad(BitCast);
450*9880d681SAndroid Build Coastguard Worker       Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
451*9880d681SAndroid Build Coastguard Worker       Inst->replaceAllUsesWith(ExtractElement);
452*9880d681SAndroid Build Coastguard Worker       Inst->eraseFromParent();
453*9880d681SAndroid Build Coastguard Worker       break;
454*9880d681SAndroid Build Coastguard Worker     }
455*9880d681SAndroid Build Coastguard Worker     case Instruction::Store: {
456*9880d681SAndroid Build Coastguard Worker       Value *Ptr = Inst->getOperand(1);
457*9880d681SAndroid Build Coastguard Worker       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
458*9880d681SAndroid Build Coastguard Worker       Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
459*9880d681SAndroid Build Coastguard Worker       Value *VecValue = Builder.CreateLoad(BitCast);
460*9880d681SAndroid Build Coastguard Worker       Value *NewVecValue = Builder.CreateInsertElement(VecValue,
461*9880d681SAndroid Build Coastguard Worker                                                        Inst->getOperand(0),
462*9880d681SAndroid Build Coastguard Worker                                                        Index);
463*9880d681SAndroid Build Coastguard Worker       Builder.CreateStore(NewVecValue, BitCast);
464*9880d681SAndroid Build Coastguard Worker       Inst->eraseFromParent();
465*9880d681SAndroid Build Coastguard Worker       break;
466*9880d681SAndroid Build Coastguard Worker     }
467*9880d681SAndroid Build Coastguard Worker     case Instruction::BitCast:
468*9880d681SAndroid Build Coastguard Worker     case Instruction::AddrSpaceCast:
469*9880d681SAndroid Build Coastguard Worker       break;
470*9880d681SAndroid Build Coastguard Worker 
471*9880d681SAndroid Build Coastguard Worker     default:
472*9880d681SAndroid Build Coastguard Worker       Inst->dump();
473*9880d681SAndroid Build Coastguard Worker       llvm_unreachable("Inconsistency in instructions promotable to vector");
474*9880d681SAndroid Build Coastguard Worker     }
475*9880d681SAndroid Build Coastguard Worker   }
476*9880d681SAndroid Build Coastguard Worker   return true;
477*9880d681SAndroid Build Coastguard Worker }
478*9880d681SAndroid Build Coastguard Worker 
isCallPromotable(CallInst * CI)479*9880d681SAndroid Build Coastguard Worker static bool isCallPromotable(CallInst *CI) {
480*9880d681SAndroid Build Coastguard Worker   // TODO: We might be able to handle some cases where the callee is a
481*9880d681SAndroid Build Coastguard Worker   // constantexpr bitcast of a function.
482*9880d681SAndroid Build Coastguard Worker   if (!CI->getCalledFunction())
483*9880d681SAndroid Build Coastguard Worker     return false;
484*9880d681SAndroid Build Coastguard Worker 
485*9880d681SAndroid Build Coastguard Worker   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
486*9880d681SAndroid Build Coastguard Worker   if (!II)
487*9880d681SAndroid Build Coastguard Worker     return false;
488*9880d681SAndroid Build Coastguard Worker 
489*9880d681SAndroid Build Coastguard Worker   switch (II->getIntrinsicID()) {
490*9880d681SAndroid Build Coastguard Worker   case Intrinsic::memcpy:
491*9880d681SAndroid Build Coastguard Worker   case Intrinsic::memmove:
492*9880d681SAndroid Build Coastguard Worker   case Intrinsic::memset:
493*9880d681SAndroid Build Coastguard Worker   case Intrinsic::lifetime_start:
494*9880d681SAndroid Build Coastguard Worker   case Intrinsic::lifetime_end:
495*9880d681SAndroid Build Coastguard Worker   case Intrinsic::invariant_start:
496*9880d681SAndroid Build Coastguard Worker   case Intrinsic::invariant_end:
497*9880d681SAndroid Build Coastguard Worker   case Intrinsic::invariant_group_barrier:
498*9880d681SAndroid Build Coastguard Worker   case Intrinsic::objectsize:
499*9880d681SAndroid Build Coastguard Worker     return true;
500*9880d681SAndroid Build Coastguard Worker   default:
501*9880d681SAndroid Build Coastguard Worker     return false;
502*9880d681SAndroid Build Coastguard Worker   }
503*9880d681SAndroid Build Coastguard Worker }
504*9880d681SAndroid Build Coastguard Worker 
binaryOpIsDerivedFromSameAlloca(Value * BaseAlloca,Value * Val,Instruction * Inst,int OpIdx0,int OpIdx1) const505*9880d681SAndroid Build Coastguard Worker bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
506*9880d681SAndroid Build Coastguard Worker                                                           Value *Val,
507*9880d681SAndroid Build Coastguard Worker                                                           Instruction *Inst,
508*9880d681SAndroid Build Coastguard Worker                                                           int OpIdx0,
509*9880d681SAndroid Build Coastguard Worker                                                           int OpIdx1) const {
510*9880d681SAndroid Build Coastguard Worker   // Figure out which operand is the one we might not be promoting.
511*9880d681SAndroid Build Coastguard Worker   Value *OtherOp = Inst->getOperand(OpIdx0);
512*9880d681SAndroid Build Coastguard Worker   if (Val == OtherOp)
513*9880d681SAndroid Build Coastguard Worker     OtherOp = Inst->getOperand(OpIdx1);
514*9880d681SAndroid Build Coastguard Worker 
515*9880d681SAndroid Build Coastguard Worker   if (isa<ConstantPointerNull>(OtherOp))
516*9880d681SAndroid Build Coastguard Worker     return true;
517*9880d681SAndroid Build Coastguard Worker 
518*9880d681SAndroid Build Coastguard Worker   Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
519*9880d681SAndroid Build Coastguard Worker   if (!isa<AllocaInst>(OtherObj))
520*9880d681SAndroid Build Coastguard Worker     return false;
521*9880d681SAndroid Build Coastguard Worker 
522*9880d681SAndroid Build Coastguard Worker   // TODO: We should be able to replace undefs with the right pointer type.
523*9880d681SAndroid Build Coastguard Worker 
524*9880d681SAndroid Build Coastguard Worker   // TODO: If we know the other base object is another promotable
525*9880d681SAndroid Build Coastguard Worker   // alloca, not necessarily this alloca, we can do this. The
526*9880d681SAndroid Build Coastguard Worker   // important part is both must have the same address space at
527*9880d681SAndroid Build Coastguard Worker   // the end.
528*9880d681SAndroid Build Coastguard Worker   if (OtherObj != BaseAlloca) {
529*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
530*9880d681SAndroid Build Coastguard Worker     return false;
531*9880d681SAndroid Build Coastguard Worker   }
532*9880d681SAndroid Build Coastguard Worker 
533*9880d681SAndroid Build Coastguard Worker   return true;
534*9880d681SAndroid Build Coastguard Worker }
535*9880d681SAndroid Build Coastguard Worker 
collectUsesWithPtrTypes(Value * BaseAlloca,Value * Val,std::vector<Value * > & WorkList) const536*9880d681SAndroid Build Coastguard Worker bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
537*9880d681SAndroid Build Coastguard Worker   Value *BaseAlloca,
538*9880d681SAndroid Build Coastguard Worker   Value *Val,
539*9880d681SAndroid Build Coastguard Worker   std::vector<Value*> &WorkList) const {
540*9880d681SAndroid Build Coastguard Worker 
541*9880d681SAndroid Build Coastguard Worker   for (User *User : Val->users()) {
542*9880d681SAndroid Build Coastguard Worker     if (std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
543*9880d681SAndroid Build Coastguard Worker       continue;
544*9880d681SAndroid Build Coastguard Worker 
545*9880d681SAndroid Build Coastguard Worker     if (CallInst *CI = dyn_cast<CallInst>(User)) {
546*9880d681SAndroid Build Coastguard Worker       if (!isCallPromotable(CI))
547*9880d681SAndroid Build Coastguard Worker         return false;
548*9880d681SAndroid Build Coastguard Worker 
549*9880d681SAndroid Build Coastguard Worker       WorkList.push_back(User);
550*9880d681SAndroid Build Coastguard Worker       continue;
551*9880d681SAndroid Build Coastguard Worker     }
552*9880d681SAndroid Build Coastguard Worker 
553*9880d681SAndroid Build Coastguard Worker     Instruction *UseInst = cast<Instruction>(User);
554*9880d681SAndroid Build Coastguard Worker     if (UseInst->getOpcode() == Instruction::PtrToInt)
555*9880d681SAndroid Build Coastguard Worker       return false;
556*9880d681SAndroid Build Coastguard Worker 
557*9880d681SAndroid Build Coastguard Worker     if (LoadInst *LI = dyn_cast_or_null<LoadInst>(UseInst)) {
558*9880d681SAndroid Build Coastguard Worker       if (LI->isVolatile())
559*9880d681SAndroid Build Coastguard Worker         return false;
560*9880d681SAndroid Build Coastguard Worker 
561*9880d681SAndroid Build Coastguard Worker       continue;
562*9880d681SAndroid Build Coastguard Worker     }
563*9880d681SAndroid Build Coastguard Worker 
564*9880d681SAndroid Build Coastguard Worker     if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
565*9880d681SAndroid Build Coastguard Worker       if (SI->isVolatile())
566*9880d681SAndroid Build Coastguard Worker         return false;
567*9880d681SAndroid Build Coastguard Worker 
568*9880d681SAndroid Build Coastguard Worker       // Reject if the stored value is not the pointer operand.
569*9880d681SAndroid Build Coastguard Worker       if (SI->getPointerOperand() != Val)
570*9880d681SAndroid Build Coastguard Worker         return false;
571*9880d681SAndroid Build Coastguard Worker     } else if (AtomicRMWInst *RMW = dyn_cast_or_null<AtomicRMWInst>(UseInst)) {
572*9880d681SAndroid Build Coastguard Worker       if (RMW->isVolatile())
573*9880d681SAndroid Build Coastguard Worker         return false;
574*9880d681SAndroid Build Coastguard Worker     } else if (AtomicCmpXchgInst *CAS
575*9880d681SAndroid Build Coastguard Worker                = dyn_cast_or_null<AtomicCmpXchgInst>(UseInst)) {
576*9880d681SAndroid Build Coastguard Worker       if (CAS->isVolatile())
577*9880d681SAndroid Build Coastguard Worker         return false;
578*9880d681SAndroid Build Coastguard Worker     }
579*9880d681SAndroid Build Coastguard Worker 
580*9880d681SAndroid Build Coastguard Worker     // Only promote a select if we know that the other select operand
581*9880d681SAndroid Build Coastguard Worker     // is from another pointer that will also be promoted.
582*9880d681SAndroid Build Coastguard Worker     if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
583*9880d681SAndroid Build Coastguard Worker       if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
584*9880d681SAndroid Build Coastguard Worker         return false;
585*9880d681SAndroid Build Coastguard Worker 
586*9880d681SAndroid Build Coastguard Worker       // May need to rewrite constant operands.
587*9880d681SAndroid Build Coastguard Worker       WorkList.push_back(ICmp);
588*9880d681SAndroid Build Coastguard Worker     }
589*9880d681SAndroid Build Coastguard Worker 
590*9880d681SAndroid Build Coastguard Worker     if (!User->getType()->isPointerTy())
591*9880d681SAndroid Build Coastguard Worker       continue;
592*9880d681SAndroid Build Coastguard Worker 
593*9880d681SAndroid Build Coastguard Worker     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
594*9880d681SAndroid Build Coastguard Worker       // Be conservative if an address could be computed outside the bounds of
595*9880d681SAndroid Build Coastguard Worker       // the alloca.
596*9880d681SAndroid Build Coastguard Worker       if (!GEP->isInBounds())
597*9880d681SAndroid Build Coastguard Worker         return false;
598*9880d681SAndroid Build Coastguard Worker     }
599*9880d681SAndroid Build Coastguard Worker 
600*9880d681SAndroid Build Coastguard Worker     // Only promote a select if we know that the other select operand is from
601*9880d681SAndroid Build Coastguard Worker     // another pointer that will also be promoted.
602*9880d681SAndroid Build Coastguard Worker     if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
603*9880d681SAndroid Build Coastguard Worker       if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
604*9880d681SAndroid Build Coastguard Worker         return false;
605*9880d681SAndroid Build Coastguard Worker     }
606*9880d681SAndroid Build Coastguard Worker 
607*9880d681SAndroid Build Coastguard Worker     // Repeat for phis.
608*9880d681SAndroid Build Coastguard Worker     if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
609*9880d681SAndroid Build Coastguard Worker       // TODO: Handle more complex cases. We should be able to replace loops
610*9880d681SAndroid Build Coastguard Worker       // over arrays.
611*9880d681SAndroid Build Coastguard Worker       switch (Phi->getNumIncomingValues()) {
612*9880d681SAndroid Build Coastguard Worker       case 1:
613*9880d681SAndroid Build Coastguard Worker         break;
614*9880d681SAndroid Build Coastguard Worker       case 2:
615*9880d681SAndroid Build Coastguard Worker         if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
616*9880d681SAndroid Build Coastguard Worker           return false;
617*9880d681SAndroid Build Coastguard Worker         break;
618*9880d681SAndroid Build Coastguard Worker       default:
619*9880d681SAndroid Build Coastguard Worker         return false;
620*9880d681SAndroid Build Coastguard Worker       }
621*9880d681SAndroid Build Coastguard Worker     }
622*9880d681SAndroid Build Coastguard Worker 
623*9880d681SAndroid Build Coastguard Worker     WorkList.push_back(User);
624*9880d681SAndroid Build Coastguard Worker     if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
625*9880d681SAndroid Build Coastguard Worker       return false;
626*9880d681SAndroid Build Coastguard Worker   }
627*9880d681SAndroid Build Coastguard Worker 
628*9880d681SAndroid Build Coastguard Worker   return true;
629*9880d681SAndroid Build Coastguard Worker }
630*9880d681SAndroid Build Coastguard Worker 
631*9880d681SAndroid Build Coastguard Worker // FIXME: Should try to pick the most likely to be profitable allocas first.
handleAlloca(AllocaInst & I)632*9880d681SAndroid Build Coastguard Worker void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
633*9880d681SAndroid Build Coastguard Worker   // Array allocations are probably not worth handling, since an allocation of
634*9880d681SAndroid Build Coastguard Worker   // the array type is the canonical form.
635*9880d681SAndroid Build Coastguard Worker   if (!I.isStaticAlloca() || I.isArrayAllocation())
636*9880d681SAndroid Build Coastguard Worker     return;
637*9880d681SAndroid Build Coastguard Worker 
638*9880d681SAndroid Build Coastguard Worker   IRBuilder<> Builder(&I);
639*9880d681SAndroid Build Coastguard Worker 
640*9880d681SAndroid Build Coastguard Worker   // First try to replace the alloca with a vector
641*9880d681SAndroid Build Coastguard Worker   Type *AllocaTy = I.getAllocatedType();
642*9880d681SAndroid Build Coastguard Worker 
643*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Trying to promote " << I << '\n');
644*9880d681SAndroid Build Coastguard Worker 
645*9880d681SAndroid Build Coastguard Worker   if (tryPromoteAllocaToVector(&I)) {
646*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
647*9880d681SAndroid Build Coastguard Worker     return;
648*9880d681SAndroid Build Coastguard Worker   }
649*9880d681SAndroid Build Coastguard Worker 
650*9880d681SAndroid Build Coastguard Worker   const Function &ContainingFunction = *I.getParent()->getParent();
651*9880d681SAndroid Build Coastguard Worker 
652*9880d681SAndroid Build Coastguard Worker   // FIXME: We should also try to get this value from the reqd_work_group_size
653*9880d681SAndroid Build Coastguard Worker   // function attribute if it is available.
654*9880d681SAndroid Build Coastguard Worker   unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
655*9880d681SAndroid Build Coastguard Worker 
656*9880d681SAndroid Build Coastguard Worker   const DataLayout &DL = Mod->getDataLayout();
657*9880d681SAndroid Build Coastguard Worker 
658*9880d681SAndroid Build Coastguard Worker   unsigned Align = I.getAlignment();
659*9880d681SAndroid Build Coastguard Worker   if (Align == 0)
660*9880d681SAndroid Build Coastguard Worker     Align = DL.getABITypeAlignment(I.getAllocatedType());
661*9880d681SAndroid Build Coastguard Worker 
662*9880d681SAndroid Build Coastguard Worker   // FIXME: This computed padding is likely wrong since it depends on inverse
663*9880d681SAndroid Build Coastguard Worker   // usage order.
664*9880d681SAndroid Build Coastguard Worker   //
665*9880d681SAndroid Build Coastguard Worker   // FIXME: It is also possible that if we're allowed to use all of the memory
666*9880d681SAndroid Build Coastguard Worker   // could could end up using more than the maximum due to alignment padding.
667*9880d681SAndroid Build Coastguard Worker 
668*9880d681SAndroid Build Coastguard Worker   uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
669*9880d681SAndroid Build Coastguard Worker   uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
670*9880d681SAndroid Build Coastguard Worker   NewSize += AllocSize;
671*9880d681SAndroid Build Coastguard Worker 
672*9880d681SAndroid Build Coastguard Worker   if (NewSize > LocalMemLimit) {
673*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "  " << AllocSize
674*9880d681SAndroid Build Coastguard Worker           << " bytes of local memory not available to promote\n");
675*9880d681SAndroid Build Coastguard Worker     return;
676*9880d681SAndroid Build Coastguard Worker   }
677*9880d681SAndroid Build Coastguard Worker 
678*9880d681SAndroid Build Coastguard Worker   CurrentLocalMemUsage = NewSize;
679*9880d681SAndroid Build Coastguard Worker 
680*9880d681SAndroid Build Coastguard Worker   std::vector<Value*> WorkList;
681*9880d681SAndroid Build Coastguard Worker 
682*9880d681SAndroid Build Coastguard Worker   if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
683*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << " Do not know how to convert all uses\n");
684*9880d681SAndroid Build Coastguard Worker     return;
685*9880d681SAndroid Build Coastguard Worker   }
686*9880d681SAndroid Build Coastguard Worker 
687*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Promoting alloca to local memory\n");
688*9880d681SAndroid Build Coastguard Worker 
689*9880d681SAndroid Build Coastguard Worker   Function *F = I.getParent()->getParent();
690*9880d681SAndroid Build Coastguard Worker 
691*9880d681SAndroid Build Coastguard Worker   Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
692*9880d681SAndroid Build Coastguard Worker   GlobalVariable *GV = new GlobalVariable(
693*9880d681SAndroid Build Coastguard Worker       *Mod, GVTy, false, GlobalValue::InternalLinkage,
694*9880d681SAndroid Build Coastguard Worker       UndefValue::get(GVTy),
695*9880d681SAndroid Build Coastguard Worker       Twine(F->getName()) + Twine('.') + I.getName(),
696*9880d681SAndroid Build Coastguard Worker       nullptr,
697*9880d681SAndroid Build Coastguard Worker       GlobalVariable::NotThreadLocal,
698*9880d681SAndroid Build Coastguard Worker       AMDGPUAS::LOCAL_ADDRESS);
699*9880d681SAndroid Build Coastguard Worker   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
700*9880d681SAndroid Build Coastguard Worker   GV->setAlignment(I.getAlignment());
701*9880d681SAndroid Build Coastguard Worker 
702*9880d681SAndroid Build Coastguard Worker   Value *TCntY, *TCntZ;
703*9880d681SAndroid Build Coastguard Worker 
704*9880d681SAndroid Build Coastguard Worker   std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
705*9880d681SAndroid Build Coastguard Worker   Value *TIdX = getWorkitemID(Builder, 0);
706*9880d681SAndroid Build Coastguard Worker   Value *TIdY = getWorkitemID(Builder, 1);
707*9880d681SAndroid Build Coastguard Worker   Value *TIdZ = getWorkitemID(Builder, 2);
708*9880d681SAndroid Build Coastguard Worker 
709*9880d681SAndroid Build Coastguard Worker   Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
710*9880d681SAndroid Build Coastguard Worker   Tmp0 = Builder.CreateMul(Tmp0, TIdX);
711*9880d681SAndroid Build Coastguard Worker   Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
712*9880d681SAndroid Build Coastguard Worker   Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
713*9880d681SAndroid Build Coastguard Worker   TID = Builder.CreateAdd(TID, TIdZ);
714*9880d681SAndroid Build Coastguard Worker 
715*9880d681SAndroid Build Coastguard Worker   Value *Indices[] = {
716*9880d681SAndroid Build Coastguard Worker     Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
717*9880d681SAndroid Build Coastguard Worker     TID
718*9880d681SAndroid Build Coastguard Worker   };
719*9880d681SAndroid Build Coastguard Worker 
720*9880d681SAndroid Build Coastguard Worker   Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
721*9880d681SAndroid Build Coastguard Worker   I.mutateType(Offset->getType());
722*9880d681SAndroid Build Coastguard Worker   I.replaceAllUsesWith(Offset);
723*9880d681SAndroid Build Coastguard Worker   I.eraseFromParent();
724*9880d681SAndroid Build Coastguard Worker 
725*9880d681SAndroid Build Coastguard Worker   for (Value *V : WorkList) {
726*9880d681SAndroid Build Coastguard Worker     CallInst *Call = dyn_cast<CallInst>(V);
727*9880d681SAndroid Build Coastguard Worker     if (!Call) {
728*9880d681SAndroid Build Coastguard Worker       if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
729*9880d681SAndroid Build Coastguard Worker         Value *Src0 = CI->getOperand(0);
730*9880d681SAndroid Build Coastguard Worker         Type *EltTy = Src0->getType()->getPointerElementType();
731*9880d681SAndroid Build Coastguard Worker         PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
732*9880d681SAndroid Build Coastguard Worker 
733*9880d681SAndroid Build Coastguard Worker         if (isa<ConstantPointerNull>(CI->getOperand(0)))
734*9880d681SAndroid Build Coastguard Worker           CI->setOperand(0, ConstantPointerNull::get(NewTy));
735*9880d681SAndroid Build Coastguard Worker 
736*9880d681SAndroid Build Coastguard Worker         if (isa<ConstantPointerNull>(CI->getOperand(1)))
737*9880d681SAndroid Build Coastguard Worker           CI->setOperand(1, ConstantPointerNull::get(NewTy));
738*9880d681SAndroid Build Coastguard Worker 
739*9880d681SAndroid Build Coastguard Worker         continue;
740*9880d681SAndroid Build Coastguard Worker       }
741*9880d681SAndroid Build Coastguard Worker 
742*9880d681SAndroid Build Coastguard Worker       // The operand's value should be corrected on its own.
743*9880d681SAndroid Build Coastguard Worker       if (isa<AddrSpaceCastInst>(V))
744*9880d681SAndroid Build Coastguard Worker         continue;
745*9880d681SAndroid Build Coastguard Worker 
746*9880d681SAndroid Build Coastguard Worker       Type *EltTy = V->getType()->getPointerElementType();
747*9880d681SAndroid Build Coastguard Worker       PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
748*9880d681SAndroid Build Coastguard Worker 
749*9880d681SAndroid Build Coastguard Worker       // FIXME: It doesn't really make sense to try to do this for all
750*9880d681SAndroid Build Coastguard Worker       // instructions.
751*9880d681SAndroid Build Coastguard Worker       V->mutateType(NewTy);
752*9880d681SAndroid Build Coastguard Worker 
753*9880d681SAndroid Build Coastguard Worker       // Adjust the types of any constant operands.
754*9880d681SAndroid Build Coastguard Worker       if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
755*9880d681SAndroid Build Coastguard Worker         if (isa<ConstantPointerNull>(SI->getOperand(1)))
756*9880d681SAndroid Build Coastguard Worker           SI->setOperand(1, ConstantPointerNull::get(NewTy));
757*9880d681SAndroid Build Coastguard Worker 
758*9880d681SAndroid Build Coastguard Worker         if (isa<ConstantPointerNull>(SI->getOperand(2)))
759*9880d681SAndroid Build Coastguard Worker           SI->setOperand(2, ConstantPointerNull::get(NewTy));
760*9880d681SAndroid Build Coastguard Worker       } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
761*9880d681SAndroid Build Coastguard Worker         for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
762*9880d681SAndroid Build Coastguard Worker           if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
763*9880d681SAndroid Build Coastguard Worker             Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
764*9880d681SAndroid Build Coastguard Worker         }
765*9880d681SAndroid Build Coastguard Worker       }
766*9880d681SAndroid Build Coastguard Worker 
767*9880d681SAndroid Build Coastguard Worker       continue;
768*9880d681SAndroid Build Coastguard Worker     }
769*9880d681SAndroid Build Coastguard Worker 
770*9880d681SAndroid Build Coastguard Worker     IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
771*9880d681SAndroid Build Coastguard Worker     if (!Intr) {
772*9880d681SAndroid Build Coastguard Worker       // FIXME: What is this for? It doesn't make sense to promote arbitrary
773*9880d681SAndroid Build Coastguard Worker       // function calls. If the call is to a defined function that can also be
774*9880d681SAndroid Build Coastguard Worker       // promoted, we should be able to do this once that function is also
775*9880d681SAndroid Build Coastguard Worker       // rewritten.
776*9880d681SAndroid Build Coastguard Worker 
777*9880d681SAndroid Build Coastguard Worker       std::vector<Type*> ArgTypes;
778*9880d681SAndroid Build Coastguard Worker       for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
779*9880d681SAndroid Build Coastguard Worker                                 ArgIdx != ArgEnd; ++ArgIdx) {
780*9880d681SAndroid Build Coastguard Worker         ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
781*9880d681SAndroid Build Coastguard Worker       }
782*9880d681SAndroid Build Coastguard Worker       Function *F = Call->getCalledFunction();
783*9880d681SAndroid Build Coastguard Worker       FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
784*9880d681SAndroid Build Coastguard Worker                                                 F->isVarArg());
785*9880d681SAndroid Build Coastguard Worker       Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
786*9880d681SAndroid Build Coastguard Worker                                              NewType, F->getAttributes());
787*9880d681SAndroid Build Coastguard Worker       Function *NewF = cast<Function>(C);
788*9880d681SAndroid Build Coastguard Worker       Call->setCalledFunction(NewF);
789*9880d681SAndroid Build Coastguard Worker       continue;
790*9880d681SAndroid Build Coastguard Worker     }
791*9880d681SAndroid Build Coastguard Worker 
792*9880d681SAndroid Build Coastguard Worker     Builder.SetInsertPoint(Intr);
793*9880d681SAndroid Build Coastguard Worker     switch (Intr->getIntrinsicID()) {
794*9880d681SAndroid Build Coastguard Worker     case Intrinsic::lifetime_start:
795*9880d681SAndroid Build Coastguard Worker     case Intrinsic::lifetime_end:
796*9880d681SAndroid Build Coastguard Worker       // These intrinsics are for address space 0 only
797*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
798*9880d681SAndroid Build Coastguard Worker       continue;
799*9880d681SAndroid Build Coastguard Worker     case Intrinsic::memcpy: {
800*9880d681SAndroid Build Coastguard Worker       MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
801*9880d681SAndroid Build Coastguard Worker       Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
802*9880d681SAndroid Build Coastguard Worker                            MemCpy->getLength(), MemCpy->getAlignment(),
803*9880d681SAndroid Build Coastguard Worker                            MemCpy->isVolatile());
804*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
805*9880d681SAndroid Build Coastguard Worker       continue;
806*9880d681SAndroid Build Coastguard Worker     }
807*9880d681SAndroid Build Coastguard Worker     case Intrinsic::memmove: {
808*9880d681SAndroid Build Coastguard Worker       MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
809*9880d681SAndroid Build Coastguard Worker       Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
810*9880d681SAndroid Build Coastguard Worker                             MemMove->getLength(), MemMove->getAlignment(),
811*9880d681SAndroid Build Coastguard Worker                             MemMove->isVolatile());
812*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
813*9880d681SAndroid Build Coastguard Worker       continue;
814*9880d681SAndroid Build Coastguard Worker     }
815*9880d681SAndroid Build Coastguard Worker     case Intrinsic::memset: {
816*9880d681SAndroid Build Coastguard Worker       MemSetInst *MemSet = cast<MemSetInst>(Intr);
817*9880d681SAndroid Build Coastguard Worker       Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
818*9880d681SAndroid Build Coastguard Worker                            MemSet->getLength(), MemSet->getAlignment(),
819*9880d681SAndroid Build Coastguard Worker                            MemSet->isVolatile());
820*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
821*9880d681SAndroid Build Coastguard Worker       continue;
822*9880d681SAndroid Build Coastguard Worker     }
823*9880d681SAndroid Build Coastguard Worker     case Intrinsic::invariant_start:
824*9880d681SAndroid Build Coastguard Worker     case Intrinsic::invariant_end:
825*9880d681SAndroid Build Coastguard Worker     case Intrinsic::invariant_group_barrier:
826*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
827*9880d681SAndroid Build Coastguard Worker       // FIXME: I think the invariant marker should still theoretically apply,
828*9880d681SAndroid Build Coastguard Worker       // but the intrinsics need to be changed to accept pointers with any
829*9880d681SAndroid Build Coastguard Worker       // address space.
830*9880d681SAndroid Build Coastguard Worker       continue;
831*9880d681SAndroid Build Coastguard Worker     case Intrinsic::objectsize: {
832*9880d681SAndroid Build Coastguard Worker       Value *Src = Intr->getOperand(0);
833*9880d681SAndroid Build Coastguard Worker       Type *SrcTy = Src->getType()->getPointerElementType();
834*9880d681SAndroid Build Coastguard Worker       Function *ObjectSize = Intrinsic::getDeclaration(Mod,
835*9880d681SAndroid Build Coastguard Worker         Intrinsic::objectsize,
836*9880d681SAndroid Build Coastguard Worker         { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
837*9880d681SAndroid Build Coastguard Worker       );
838*9880d681SAndroid Build Coastguard Worker 
839*9880d681SAndroid Build Coastguard Worker       CallInst *NewCall
840*9880d681SAndroid Build Coastguard Worker         = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
841*9880d681SAndroid Build Coastguard Worker       Intr->replaceAllUsesWith(NewCall);
842*9880d681SAndroid Build Coastguard Worker       Intr->eraseFromParent();
843*9880d681SAndroid Build Coastguard Worker       continue;
844*9880d681SAndroid Build Coastguard Worker     }
845*9880d681SAndroid Build Coastguard Worker     default:
846*9880d681SAndroid Build Coastguard Worker       Intr->dump();
847*9880d681SAndroid Build Coastguard Worker       llvm_unreachable("Don't know how to promote alloca intrinsic use.");
848*9880d681SAndroid Build Coastguard Worker     }
849*9880d681SAndroid Build Coastguard Worker   }
850*9880d681SAndroid Build Coastguard Worker }
851*9880d681SAndroid Build Coastguard Worker 
createAMDGPUPromoteAlloca(const TargetMachine * TM)852*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createAMDGPUPromoteAlloca(const TargetMachine *TM) {
853*9880d681SAndroid Build Coastguard Worker   return new AMDGPUPromoteAlloca(TM);
854*9880d681SAndroid Build Coastguard Worker }
855