1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "llvm/CodeGen/MachineModuleInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/Target/TargetMachine.h"
16
17 using namespace llvm;
18
AMDGPUMachineFunction(const Function & F,const AMDGPUSubtarget & ST)19 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
20 const AMDGPUSubtarget &ST)
21 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
22 IsModuleEntryFunction(
23 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
24 NoSignedZerosFPMath(false) {
25
26 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
27 // except reserved size is not correctly aligned.
28
29 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
30 MemoryBound = MemBoundAttr.getValueAsBool();
31
32 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
33 WaveLimiter = WaveLimitAttr.getValueAsBool();
34
35 // FIXME: How is this attribute supposed to interact with statically known
36 // global sizes?
37 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
38 if (!S.empty())
39 S.consumeInteger(0, GDSSize);
40
41 // Assume the attribute allocates before any known GDS globals.
42 StaticGDSSize = GDSSize;
43
44 CallingConv::ID CC = F.getCallingConv();
45 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
46 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
47
48 // FIXME: Shouldn't be target specific
49 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
50 NoSignedZerosFPMath =
51 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
52 }
53
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV,Align Trailing)54 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
55 const GlobalVariable &GV,
56 Align Trailing) {
57 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
58 if (!Entry.second)
59 return Entry.first->second;
60
61 Align Alignment =
62 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
63
64 unsigned Offset;
65 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
66 /// TODO: We should sort these to minimize wasted space due to alignment
67 /// padding. Currently the padding is decided by the first encountered use
68 /// during lowering.
69 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
70
71 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
72
73 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
74 LDSSize = alignTo(StaticLDSSize, Trailing);
75 } else {
76 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
77 "expected region address space");
78
79 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
80 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
81
82 // FIXME: Apply alignment of dynamic GDS
83 GDSSize = StaticGDSSize;
84 }
85
86 Entry.first->second = Offset;
87 return Offset;
88 }
89
90 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
91
isKnownAddressLDSGlobal(const GlobalVariable & GV)92 bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
93 auto name = GV.getName();
94 return (name == ModuleLDSName) ||
95 (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
96 }
97
getKernelLDSFunctionFromGlobal(const GlobalVariable & GV)98 const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
99 const GlobalVariable &GV) {
100 const Module &M = *GV.getParent();
101 StringRef N(GV.getName());
102 if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
103 return M.getFunction(N);
104 }
105 return nullptr;
106 }
107
108 const GlobalVariable *
getKernelLDSGlobalFromFunction(const Function & F)109 AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
110 const Module *M = F.getParent();
111 std::string KernelLDSName = "llvm.amdgcn.kernel.";
112 KernelLDSName += F.getName();
113 KernelLDSName += ".lds";
114 return M->getNamedGlobal(KernelLDSName);
115 }
116
117 // This kernel calls no functions that require the module lds struct
canElideModuleLDS(const Function & F)118 static bool canElideModuleLDS(const Function &F) {
119 return F.hasFnAttribute("amdgpu-elide-module-lds");
120 }
121
calculateKnownAddressOfLDSGlobal(const GlobalVariable & GV)122 unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
123 const GlobalVariable &GV) {
124 // module.lds, then alignment padding, then kernel.lds, then other variables
125 // if any
126
127 assert(isKnownAddressLDSGlobal(GV));
128 unsigned Offset = 0;
129
130 if (GV.getName() == ModuleLDSName) {
131 return 0;
132 }
133
134 const Module *M = GV.getParent();
135 const DataLayout &DL = M->getDataLayout();
136
137 const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
138 const Function *f = getKernelLDSFunctionFromGlobal(GV);
139
140 // Account for module.lds if allocated for this function
141 if (GVM && f && !canElideModuleLDS(*f)) {
142 // allocator aligns this to var align, but it's zero to begin with
143 Offset += DL.getTypeAllocSize(GVM->getValueType());
144 }
145
146 // No dynamic LDS alignment done by allocateModuleLDSGlobal
147 Offset = alignTo(
148 Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
149
150 return Offset;
151 }
152
allocateKnownAddressLDSGlobal(const Function & F)153 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
154 const Module *M = F.getParent();
155
156 // This function is called before allocating any other LDS so that it can
157 // reliably put values at known addresses. Consequently, dynamic LDS, if
158 // present, will not yet have been allocated
159
160 assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
161
162 if (isModuleEntryFunction()) {
163
164 // Pointer values start from zero, memory allocated per-kernel-launch
165 // Variables can be grouped into a module level struct and a struct per
166 // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
167 // are allocated at statically computable addresses here.
168 //
169 // Address 0
170 // {
171 // llvm.amdgcn.module.lds
172 // }
173 // alignment padding
174 // {
175 // llvm.amdgcn.kernel.some-name.lds
176 // }
177 // other variables, e.g. dynamic lds, allocated after this call
178
179 const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
180 const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
181
182 if (GV && !canElideModuleLDS(F)) {
183 assert(isKnownAddressLDSGlobal(*GV));
184 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
185 (void)Offset;
186 assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
187 "Module LDS expected to be allocated before other LDS");
188 }
189
190 if (KV) {
191 // The per-kernel offset is deterministic because it is allocated
192 // before any other non-module LDS variables.
193 assert(isKnownAddressLDSGlobal(*KV));
194 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
195 (void)Offset;
196 assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
197 "Kernel LDS expected to be immediately after module LDS");
198 }
199 }
200 }
201
202 std::optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)203 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
204 auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
205 if (MD && MD->getNumOperands() == 1) {
206 ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
207 if (KnownSize) {
208 uint64_t V = KnownSize->getZExtValue();
209 if (V <= UINT32_MAX) {
210 return V;
211 }
212 }
213 }
214 return {};
215 }
216
setDynLDSAlign(const DataLayout & DL,const GlobalVariable & GV)217 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
218 const GlobalVariable &GV) {
219 assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
220
221 Align Alignment =
222 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
223 if (Alignment <= DynLDSAlign)
224 return;
225
226 LDSSize = alignTo(StaticLDSSize, Alignment);
227 DynLDSAlign = Alignment;
228 }
229