1*9880d681SAndroid Build Coastguard Worker //===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // \file
11*9880d681SAndroid Build Coastguard Worker // This file implements a TargetTransformInfo analysis pass specific to the
12*9880d681SAndroid Build Coastguard Worker // AMDGPU target machine. It uses the target's detailed information to provide
13*9880d681SAndroid Build Coastguard Worker // more precise answers to certain TTI queries, while letting the target
14*9880d681SAndroid Build Coastguard Worker // independent and default TTI implementations handle the rest.
15*9880d681SAndroid Build Coastguard Worker //
16*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Worker #include "AMDGPUTargetTransformInfo.h"
19*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/TargetTransformInfo.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/ValueTracking.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/BasicTTIImpl.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Module.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Intrinsics.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/CostTable.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
28*9880d681SAndroid Build Coastguard Worker using namespace llvm;
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "AMDGPUtti"
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Worker
getUnrollingPreferences(Loop * L,TTI::UnrollingPreferences & UP)33*9880d681SAndroid Build Coastguard Worker void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
34*9880d681SAndroid Build Coastguard Worker TTI::UnrollingPreferences &UP) {
35*9880d681SAndroid Build Coastguard Worker UP.Threshold = 300; // Twice the default.
36*9880d681SAndroid Build Coastguard Worker UP.MaxCount = UINT_MAX;
37*9880d681SAndroid Build Coastguard Worker UP.Partial = true;
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker // TODO: Do we want runtime unrolling?
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Worker for (const BasicBlock *BB : L->getBlocks()) {
42*9880d681SAndroid Build Coastguard Worker const DataLayout &DL = BB->getModule()->getDataLayout();
43*9880d681SAndroid Build Coastguard Worker for (const Instruction &I : *BB) {
44*9880d681SAndroid Build Coastguard Worker const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
45*9880d681SAndroid Build Coastguard Worker if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
46*9880d681SAndroid Build Coastguard Worker continue;
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Worker const Value *Ptr = GEP->getPointerOperand();
49*9880d681SAndroid Build Coastguard Worker const AllocaInst *Alloca =
50*9880d681SAndroid Build Coastguard Worker dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
51*9880d681SAndroid Build Coastguard Worker if (Alloca) {
52*9880d681SAndroid Build Coastguard Worker // We want to do whatever we can to limit the number of alloca
53*9880d681SAndroid Build Coastguard Worker // instructions that make it through to the code generator. allocas
54*9880d681SAndroid Build Coastguard Worker // require us to use indirect addressing, which is slow and prone to
55*9880d681SAndroid Build Coastguard Worker // compiler bugs. If this loop does an address calculation on an
56*9880d681SAndroid Build Coastguard Worker // alloca ptr, then we want to use a higher than normal loop unroll
57*9880d681SAndroid Build Coastguard Worker // threshold. This will give SROA a better chance to eliminate these
58*9880d681SAndroid Build Coastguard Worker // allocas.
59*9880d681SAndroid Build Coastguard Worker //
60*9880d681SAndroid Build Coastguard Worker // Don't use the maximum allowed value here as it will make some
61*9880d681SAndroid Build Coastguard Worker // programs way too big.
62*9880d681SAndroid Build Coastguard Worker UP.Threshold = 800;
63*9880d681SAndroid Build Coastguard Worker }
64*9880d681SAndroid Build Coastguard Worker }
65*9880d681SAndroid Build Coastguard Worker }
66*9880d681SAndroid Build Coastguard Worker }
67*9880d681SAndroid Build Coastguard Worker
getNumberOfRegisters(bool Vec)68*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
69*9880d681SAndroid Build Coastguard Worker if (Vec)
70*9880d681SAndroid Build Coastguard Worker return 0;
71*9880d681SAndroid Build Coastguard Worker
72*9880d681SAndroid Build Coastguard Worker // Number of VGPRs on SI.
73*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
74*9880d681SAndroid Build Coastguard Worker return 256;
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Worker return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
77*9880d681SAndroid Build Coastguard Worker }
78*9880d681SAndroid Build Coastguard Worker
getRegisterBitWidth(bool Vector)79*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
80*9880d681SAndroid Build Coastguard Worker return Vector ? 0 : 32;
81*9880d681SAndroid Build Coastguard Worker }
82*9880d681SAndroid Build Coastguard Worker
getLoadStoreVecRegBitWidth(unsigned AddrSpace)83*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) {
84*9880d681SAndroid Build Coastguard Worker switch (AddrSpace) {
85*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::GLOBAL_ADDRESS:
86*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::CONSTANT_ADDRESS:
87*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::FLAT_ADDRESS:
88*9880d681SAndroid Build Coastguard Worker return 128;
89*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::LOCAL_ADDRESS:
90*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::REGION_ADDRESS:
91*9880d681SAndroid Build Coastguard Worker return 64;
92*9880d681SAndroid Build Coastguard Worker case AMDGPUAS::PRIVATE_ADDRESS:
93*9880d681SAndroid Build Coastguard Worker return 8 * ST->getMaxPrivateElementSize();
94*9880d681SAndroid Build Coastguard Worker default:
95*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
96*9880d681SAndroid Build Coastguard Worker (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
97*9880d681SAndroid Build Coastguard Worker AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
98*9880d681SAndroid Build Coastguard Worker (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
99*9880d681SAndroid Build Coastguard Worker AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
100*9880d681SAndroid Build Coastguard Worker return 128;
101*9880d681SAndroid Build Coastguard Worker llvm_unreachable("unhandled address space");
102*9880d681SAndroid Build Coastguard Worker }
103*9880d681SAndroid Build Coastguard Worker }
104*9880d681SAndroid Build Coastguard Worker
getMaxInterleaveFactor(unsigned VF)105*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
106*9880d681SAndroid Build Coastguard Worker // Semi-arbitrary large amount.
107*9880d681SAndroid Build Coastguard Worker return 64;
108*9880d681SAndroid Build Coastguard Worker }
109*9880d681SAndroid Build Coastguard Worker
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo)110*9880d681SAndroid Build Coastguard Worker int AMDGPUTTIImpl::getArithmeticInstrCost(
111*9880d681SAndroid Build Coastguard Worker unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
112*9880d681SAndroid Build Coastguard Worker TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
113*9880d681SAndroid Build Coastguard Worker TTI::OperandValueProperties Opd2PropInfo) {
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker EVT OrigTy = TLI->getValueType(DL, Ty);
116*9880d681SAndroid Build Coastguard Worker if (!OrigTy.isSimple()) {
117*9880d681SAndroid Build Coastguard Worker return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
118*9880d681SAndroid Build Coastguard Worker Opd1PropInfo, Opd2PropInfo);
119*9880d681SAndroid Build Coastguard Worker }
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Worker // Legalize the type.
122*9880d681SAndroid Build Coastguard Worker std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
123*9880d681SAndroid Build Coastguard Worker int ISD = TLI->InstructionOpcodeToISD(Opcode);
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker // Because we don't have any legal vector operations, but the legal types, we
126*9880d681SAndroid Build Coastguard Worker // need to account for split vectors.
127*9880d681SAndroid Build Coastguard Worker unsigned NElts = LT.second.isVector() ?
128*9880d681SAndroid Build Coastguard Worker LT.second.getVectorNumElements() : 1;
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Worker MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
131*9880d681SAndroid Build Coastguard Worker
132*9880d681SAndroid Build Coastguard Worker switch (ISD) {
133*9880d681SAndroid Build Coastguard Worker case ISD::SHL:
134*9880d681SAndroid Build Coastguard Worker case ISD::SRL:
135*9880d681SAndroid Build Coastguard Worker case ISD::SRA: {
136*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::i64)
137*9880d681SAndroid Build Coastguard Worker return get64BitInstrCost() * LT.first * NElts;
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker // i32
140*9880d681SAndroid Build Coastguard Worker return getFullRateInstrCost() * LT.first * NElts;
141*9880d681SAndroid Build Coastguard Worker }
142*9880d681SAndroid Build Coastguard Worker case ISD::ADD:
143*9880d681SAndroid Build Coastguard Worker case ISD::SUB:
144*9880d681SAndroid Build Coastguard Worker case ISD::AND:
145*9880d681SAndroid Build Coastguard Worker case ISD::OR:
146*9880d681SAndroid Build Coastguard Worker case ISD::XOR: {
147*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::i64){
148*9880d681SAndroid Build Coastguard Worker // and, or and xor are typically split into 2 VALU instructions.
149*9880d681SAndroid Build Coastguard Worker return 2 * getFullRateInstrCost() * LT.first * NElts;
150*9880d681SAndroid Build Coastguard Worker }
151*9880d681SAndroid Build Coastguard Worker
152*9880d681SAndroid Build Coastguard Worker return LT.first * NElts * getFullRateInstrCost();
153*9880d681SAndroid Build Coastguard Worker }
154*9880d681SAndroid Build Coastguard Worker case ISD::MUL: {
155*9880d681SAndroid Build Coastguard Worker const int QuarterRateCost = getQuarterRateInstrCost();
156*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::i64) {
157*9880d681SAndroid Build Coastguard Worker const int FullRateCost = getFullRateInstrCost();
158*9880d681SAndroid Build Coastguard Worker return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
159*9880d681SAndroid Build Coastguard Worker }
160*9880d681SAndroid Build Coastguard Worker
161*9880d681SAndroid Build Coastguard Worker // i32
162*9880d681SAndroid Build Coastguard Worker return QuarterRateCost * NElts * LT.first;
163*9880d681SAndroid Build Coastguard Worker }
164*9880d681SAndroid Build Coastguard Worker case ISD::FADD:
165*9880d681SAndroid Build Coastguard Worker case ISD::FSUB:
166*9880d681SAndroid Build Coastguard Worker case ISD::FMUL:
167*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::f64)
168*9880d681SAndroid Build Coastguard Worker return LT.first * NElts * get64BitInstrCost();
169*9880d681SAndroid Build Coastguard Worker
170*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::f32 || SLT == MVT::f16)
171*9880d681SAndroid Build Coastguard Worker return LT.first * NElts * getFullRateInstrCost();
172*9880d681SAndroid Build Coastguard Worker break;
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker case ISD::FDIV:
175*9880d681SAndroid Build Coastguard Worker case ISD::FREM:
176*9880d681SAndroid Build Coastguard Worker // FIXME: frem should be handled separately. The fdiv in it is most of it,
177*9880d681SAndroid Build Coastguard Worker // but the current lowering is also not entirely correct.
178*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::f64) {
179*9880d681SAndroid Build Coastguard Worker int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Worker // Add cost of workaround.
182*9880d681SAndroid Build Coastguard Worker if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
183*9880d681SAndroid Build Coastguard Worker Cost += 3 * getFullRateInstrCost();
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Worker return LT.first * Cost * NElts;
186*9880d681SAndroid Build Coastguard Worker }
187*9880d681SAndroid Build Coastguard Worker
188*9880d681SAndroid Build Coastguard Worker // Assuming no fp32 denormals lowering.
189*9880d681SAndroid Build Coastguard Worker if (SLT == MVT::f32 || SLT == MVT::f16) {
190*9880d681SAndroid Build Coastguard Worker assert(!ST->hasFP32Denormals() && "will change when supported");
191*9880d681SAndroid Build Coastguard Worker int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
192*9880d681SAndroid Build Coastguard Worker return LT.first * NElts * Cost;
193*9880d681SAndroid Build Coastguard Worker }
194*9880d681SAndroid Build Coastguard Worker
195*9880d681SAndroid Build Coastguard Worker break;
196*9880d681SAndroid Build Coastguard Worker default:
197*9880d681SAndroid Build Coastguard Worker break;
198*9880d681SAndroid Build Coastguard Worker }
199*9880d681SAndroid Build Coastguard Worker
200*9880d681SAndroid Build Coastguard Worker return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
201*9880d681SAndroid Build Coastguard Worker Opd1PropInfo, Opd2PropInfo);
202*9880d681SAndroid Build Coastguard Worker }
203*9880d681SAndroid Build Coastguard Worker
getCFInstrCost(unsigned Opcode)204*9880d681SAndroid Build Coastguard Worker unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
205*9880d681SAndroid Build Coastguard Worker // XXX - For some reason this isn't called for switch.
206*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
207*9880d681SAndroid Build Coastguard Worker case Instruction::Br:
208*9880d681SAndroid Build Coastguard Worker case Instruction::Ret:
209*9880d681SAndroid Build Coastguard Worker return 10;
210*9880d681SAndroid Build Coastguard Worker default:
211*9880d681SAndroid Build Coastguard Worker return BaseT::getCFInstrCost(Opcode);
212*9880d681SAndroid Build Coastguard Worker }
213*9880d681SAndroid Build Coastguard Worker }
214*9880d681SAndroid Build Coastguard Worker
getVectorInstrCost(unsigned Opcode,Type * ValTy,unsigned Index)215*9880d681SAndroid Build Coastguard Worker int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
216*9880d681SAndroid Build Coastguard Worker unsigned Index) {
217*9880d681SAndroid Build Coastguard Worker switch (Opcode) {
218*9880d681SAndroid Build Coastguard Worker case Instruction::ExtractElement:
219*9880d681SAndroid Build Coastguard Worker case Instruction::InsertElement:
220*9880d681SAndroid Build Coastguard Worker // Extracts are just reads of a subregister, so are free. Inserts are
221*9880d681SAndroid Build Coastguard Worker // considered free because we don't want to have any cost for scalarizing
222*9880d681SAndroid Build Coastguard Worker // operations, and we don't have to copy into a different register class.
223*9880d681SAndroid Build Coastguard Worker
224*9880d681SAndroid Build Coastguard Worker // Dynamic indexing isn't free and is best avoided.
225*9880d681SAndroid Build Coastguard Worker return Index == ~0u ? 2 : 0;
226*9880d681SAndroid Build Coastguard Worker default:
227*9880d681SAndroid Build Coastguard Worker return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
228*9880d681SAndroid Build Coastguard Worker }
229*9880d681SAndroid Build Coastguard Worker }
230*9880d681SAndroid Build Coastguard Worker
isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo * TII,const IntrinsicInst * I)231*9880d681SAndroid Build Coastguard Worker static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
232*9880d681SAndroid Build Coastguard Worker const IntrinsicInst *I) {
233*9880d681SAndroid Build Coastguard Worker switch (I->getIntrinsicID()) {
234*9880d681SAndroid Build Coastguard Worker default:
235*9880d681SAndroid Build Coastguard Worker return false;
236*9880d681SAndroid Build Coastguard Worker case Intrinsic::not_intrinsic:
237*9880d681SAndroid Build Coastguard Worker // This means we have an intrinsic that isn't defined in
238*9880d681SAndroid Build Coastguard Worker // IntrinsicsAMDGPU.td
239*9880d681SAndroid Build Coastguard Worker break;
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_workitem_id_x:
242*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_workitem_id_y:
243*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_workitem_id_z:
244*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_interp_p1:
245*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_interp_p2:
246*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_mbcnt_hi:
247*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_mbcnt_lo:
248*9880d681SAndroid Build Coastguard Worker case Intrinsic::r600_read_tidig_x:
249*9880d681SAndroid Build Coastguard Worker case Intrinsic::r600_read_tidig_y:
250*9880d681SAndroid Build Coastguard Worker case Intrinsic::r600_read_tidig_z:
251*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_swap:
252*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_add:
253*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_sub:
254*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_smin:
255*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_umin:
256*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_smax:
257*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_umax:
258*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_and:
259*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_or:
260*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_xor:
261*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_inc:
262*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_dec:
263*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_image_atomic_cmpswap:
264*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_swap:
265*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_add:
266*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_sub:
267*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_smin:
268*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_umin:
269*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_smax:
270*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_umax:
271*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_and:
272*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_or:
273*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_xor:
274*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_buffer_atomic_cmpswap:
275*9880d681SAndroid Build Coastguard Worker case Intrinsic::amdgcn_ps_live:
276*9880d681SAndroid Build Coastguard Worker return true;
277*9880d681SAndroid Build Coastguard Worker }
278*9880d681SAndroid Build Coastguard Worker
279*9880d681SAndroid Build Coastguard Worker StringRef Name = I->getCalledFunction()->getName();
280*9880d681SAndroid Build Coastguard Worker switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
281*9880d681SAndroid Build Coastguard Worker default:
282*9880d681SAndroid Build Coastguard Worker return false;
283*9880d681SAndroid Build Coastguard Worker case AMDGPUIntrinsic::SI_fs_interp:
284*9880d681SAndroid Build Coastguard Worker case AMDGPUIntrinsic::SI_fs_constant:
285*9880d681SAndroid Build Coastguard Worker return true;
286*9880d681SAndroid Build Coastguard Worker }
287*9880d681SAndroid Build Coastguard Worker }
288*9880d681SAndroid Build Coastguard Worker
isArgPassedInSGPR(const Argument * A)289*9880d681SAndroid Build Coastguard Worker static bool isArgPassedInSGPR(const Argument *A) {
290*9880d681SAndroid Build Coastguard Worker const Function *F = A->getParent();
291*9880d681SAndroid Build Coastguard Worker
292*9880d681SAndroid Build Coastguard Worker // Arguments to compute shaders are never a source of divergence.
293*9880d681SAndroid Build Coastguard Worker if (!AMDGPU::isShader(F->getCallingConv()))
294*9880d681SAndroid Build Coastguard Worker return true;
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Worker // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
297*9880d681SAndroid Build Coastguard Worker if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
298*9880d681SAndroid Build Coastguard Worker F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
299*9880d681SAndroid Build Coastguard Worker return true;
300*9880d681SAndroid Build Coastguard Worker
301*9880d681SAndroid Build Coastguard Worker // Everything else is in VGPRs.
302*9880d681SAndroid Build Coastguard Worker return false;
303*9880d681SAndroid Build Coastguard Worker }
304*9880d681SAndroid Build Coastguard Worker
305*9880d681SAndroid Build Coastguard Worker ///
306*9880d681SAndroid Build Coastguard Worker /// \returns true if the result of the value could potentially be
307*9880d681SAndroid Build Coastguard Worker /// different across workitems in a wavefront.
isSourceOfDivergence(const Value * V) const308*9880d681SAndroid Build Coastguard Worker bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Worker if (const Argument *A = dyn_cast<Argument>(V))
311*9880d681SAndroid Build Coastguard Worker return !isArgPassedInSGPR(A);
312*9880d681SAndroid Build Coastguard Worker
313*9880d681SAndroid Build Coastguard Worker // Loads from the private address space are divergent, because threads
314*9880d681SAndroid Build Coastguard Worker // can execute the load instruction with the same inputs and get different
315*9880d681SAndroid Build Coastguard Worker // results.
316*9880d681SAndroid Build Coastguard Worker //
317*9880d681SAndroid Build Coastguard Worker // All other loads are not divergent, because if threads issue loads with the
318*9880d681SAndroid Build Coastguard Worker // same arguments, they will always get the same result.
319*9880d681SAndroid Build Coastguard Worker if (const LoadInst *Load = dyn_cast<LoadInst>(V))
320*9880d681SAndroid Build Coastguard Worker return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
321*9880d681SAndroid Build Coastguard Worker
322*9880d681SAndroid Build Coastguard Worker // Atomics are divergent because they are executed sequentially: when an
323*9880d681SAndroid Build Coastguard Worker // atomic operation refers to the same address in each thread, then each
324*9880d681SAndroid Build Coastguard Worker // thread after the first sees the value written by the previous thread as
325*9880d681SAndroid Build Coastguard Worker // original value.
326*9880d681SAndroid Build Coastguard Worker if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
327*9880d681SAndroid Build Coastguard Worker return true;
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Worker if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
330*9880d681SAndroid Build Coastguard Worker const TargetMachine &TM = getTLI()->getTargetMachine();
331*9880d681SAndroid Build Coastguard Worker return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
332*9880d681SAndroid Build Coastguard Worker }
333*9880d681SAndroid Build Coastguard Worker
334*9880d681SAndroid Build Coastguard Worker // Assume all function calls are a source of divergence.
335*9880d681SAndroid Build Coastguard Worker if (isa<CallInst>(V) || isa<InvokeInst>(V))
336*9880d681SAndroid Build Coastguard Worker return true;
337*9880d681SAndroid Build Coastguard Worker
338*9880d681SAndroid Build Coastguard Worker return false;
339*9880d681SAndroid Build Coastguard Worker }
340