1 //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUMemoryUtils.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUBaseInfo.h"
12 #include "llvm/ADT/SmallSet.h"
13 #include "llvm/Analysis/AliasAnalysis.h"
14 #include "llvm/Analysis/MemorySSA.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/IntrinsicsAMDGPU.h"
19 #include "llvm/IR/ReplaceConstant.h"
20
21 #define DEBUG_TYPE "amdgpu-memory-utils"
22
23 using namespace llvm;
24
25 namespace llvm {
26
27 namespace AMDGPU {
28
getAlign(DataLayout const & DL,const GlobalVariable * GV)29 Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
30 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
31 GV->getValueType());
32 }
33
shouldLowerLDSToStruct(const GlobalVariable & GV,const Function * F)34 static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
35 const Function *F) {
36 // We are not interested in kernel LDS lowering for module LDS itself.
37 if (F && GV.getName() == "llvm.amdgcn.module.lds")
38 return false;
39
40 bool Ret = false;
41 SmallPtrSet<const User *, 8> Visited;
42 SmallVector<const User *, 16> Stack(GV.users());
43
44 assert(!F || isKernelCC(F));
45
46 while (!Stack.empty()) {
47 const User *V = Stack.pop_back_val();
48 Visited.insert(V);
49
50 if (isa<GlobalValue>(V)) {
51 // This use of the LDS variable is the initializer of a global variable.
52 // This is ill formed. The address of an LDS variable is kernel dependent
53 // and unknown until runtime. It can't be written to a global variable.
54 continue;
55 }
56
57 if (auto *I = dyn_cast<Instruction>(V)) {
58 const Function *UF = I->getFunction();
59 if (UF == F) {
60 // Used from this kernel, we want to put it into the structure.
61 Ret = true;
62 } else if (!F) {
63 // For module LDS lowering, lowering is required if the user instruction
64 // is from non-kernel function.
65 Ret |= !isKernelCC(UF);
66 }
67 continue;
68 }
69
70 // User V should be a constant, recursively visit users of V.
71 assert(isa<Constant>(V) && "Expected a constant.");
72 append_range(Stack, V->users());
73 }
74
75 return Ret;
76 }
77
isLDSVariableToLower(const GlobalVariable & GV)78 bool isLDSVariableToLower(const GlobalVariable &GV) {
79 if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
80 return false;
81 }
82 if (!GV.hasInitializer()) {
83 // addrspace(3) without initializer implies cuda/hip extern __shared__
84 // the semantics for such a variable appears to be that all extern
85 // __shared__ variables alias one another, in which case this transform
86 // is not required
87 return false;
88 }
89 if (!isa<UndefValue>(GV.getInitializer())) {
90 // Initializers are unimplemented for LDS address space.
91 // Leave such variables in place for consistent error reporting.
92 return false;
93 }
94 if (GV.isConstant()) {
95 // A constant undef variable can't be written to, and any load is
96 // undef, so it should be eliminated by the optimizer. It could be
97 // dropped by the back end if not. This pass skips over it.
98 return false;
99 }
100 return true;
101 }
102
findLDSVariablesToLower(Module & M,const Function * F)103 std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
104 const Function *F) {
105 std::vector<llvm::GlobalVariable *> LocalVars;
106 for (auto &GV : M.globals()) {
107 if (!isLDSVariableToLower(GV)) {
108 continue;
109 }
110 if (!shouldLowerLDSToStruct(GV, F)) {
111 continue;
112 }
113 LocalVars.push_back(&GV);
114 }
115 return LocalVars;
116 }
117
isReallyAClobber(const Value * Ptr,MemoryDef * Def,AAResults * AA)118 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
119 Instruction *DefInst = Def->getMemoryInst();
120
121 if (isa<FenceInst>(DefInst))
122 return false;
123
124 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
125 switch (II->getIntrinsicID()) {
126 case Intrinsic::amdgcn_s_barrier:
127 case Intrinsic::amdgcn_wave_barrier:
128 case Intrinsic::amdgcn_sched_barrier:
129 case Intrinsic::amdgcn_sched_group_barrier:
130 return false;
131 default:
132 break;
133 }
134 }
135
136 // Ignore atomics not aliasing with the original load, any atomic is a
137 // universal MemoryDef from MSSA's point of view too, just like a fence.
138 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
139 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
140 };
141
142 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
143 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
144 return false;
145
146 return true;
147 }
148
isClobberedInFunction(const LoadInst * Load,MemorySSA * MSSA,AAResults * AA)149 bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
150 AAResults *AA) {
151 MemorySSAWalker *Walker = MSSA->getWalker();
152 SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
153 SmallSet<MemoryAccess *, 8> Visited;
154 MemoryLocation Loc(MemoryLocation::get(Load));
155
156 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
157
158 // Start with a nearest dominating clobbering access, it will be either
159 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
160 // MemoryPhi if several MemoryDefs can define this memory state. In that
161 // case add all Defs to WorkList and continue going up and checking all
162 // the definitions of this memory location until the root. When all the
163 // defs are exhausted and came to the entry state we have no clobber.
164 // Along the scan ignore barriers and fences which are considered clobbers
165 // by the MemorySSA, but not really writing anything into the memory.
166 while (!WorkList.empty()) {
167 MemoryAccess *MA = WorkList.pop_back_val();
168 if (!Visited.insert(MA).second)
169 continue;
170
171 if (MSSA->isLiveOnEntryDef(MA))
172 continue;
173
174 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
175 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
176
177 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
178 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
179 return true;
180 }
181
182 WorkList.push_back(
183 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
184 continue;
185 }
186
187 const MemoryPhi *Phi = cast<MemoryPhi>(MA);
188 for (const auto &Use : Phi->incoming_values())
189 WorkList.push_back(cast<MemoryAccess>(&Use));
190 }
191
192 LLVM_DEBUG(dbgs() << " -> no clobber\n");
193 return false;
194 }
195
196 } // end namespace AMDGPU
197
198 } // end namespace llvm
199