xref: /aosp_15_r20/external/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This transformation analyzes and transforms the induction variables (and
11*9880d681SAndroid Build Coastguard Worker // computations derived from them) into forms suitable for efficient execution
12*9880d681SAndroid Build Coastguard Worker // on the target.
13*9880d681SAndroid Build Coastguard Worker //
14*9880d681SAndroid Build Coastguard Worker // This pass performs a strength reduction on array references inside loops that
15*9880d681SAndroid Build Coastguard Worker // have as one or more of their components the loop induction variable, it
16*9880d681SAndroid Build Coastguard Worker // rewrites expressions to take advantage of scaled-index addressing modes
17*9880d681SAndroid Build Coastguard Worker // available on the target, and it performs a variety of other optimizations
18*9880d681SAndroid Build Coastguard Worker // related to loop induction variables.
19*9880d681SAndroid Build Coastguard Worker //
20*9880d681SAndroid Build Coastguard Worker // Terminology note: this code has a lot of handling for "post-increment" or
21*9880d681SAndroid Build Coastguard Worker // "post-inc" users. This is not talking about post-increment addressing modes;
22*9880d681SAndroid Build Coastguard Worker // it is instead talking about code like this:
23*9880d681SAndroid Build Coastguard Worker //
24*9880d681SAndroid Build Coastguard Worker //   %i = phi [ 0, %entry ], [ %i.next, %latch ]
25*9880d681SAndroid Build Coastguard Worker //   ...
26*9880d681SAndroid Build Coastguard Worker //   %i.next = add %i, 1
27*9880d681SAndroid Build Coastguard Worker //   %c = icmp eq %i.next, %n
28*9880d681SAndroid Build Coastguard Worker //
29*9880d681SAndroid Build Coastguard Worker // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
30*9880d681SAndroid Build Coastguard Worker // it's useful to think about these as the same register, with some uses using
31*9880d681SAndroid Build Coastguard Worker // the value of the register before the add and some using it after. In this
32*9880d681SAndroid Build Coastguard Worker // example, the icmp is a post-increment user, since it uses %i.next, which is
33*9880d681SAndroid Build Coastguard Worker // the value of the induction variable after the increment. The other common
34*9880d681SAndroid Build Coastguard Worker // case of post-increment users is users outside the loop.
35*9880d681SAndroid Build Coastguard Worker //
36*9880d681SAndroid Build Coastguard Worker // TODO: More sophistication in the way Formulae are generated and filtered.
37*9880d681SAndroid Build Coastguard Worker //
38*9880d681SAndroid Build Coastguard Worker // TODO: Handle multiple loops at a time.
39*9880d681SAndroid Build Coastguard Worker //
40*9880d681SAndroid Build Coastguard Worker // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
41*9880d681SAndroid Build Coastguard Worker //       of a GlobalValue?
42*9880d681SAndroid Build Coastguard Worker //
43*9880d681SAndroid Build Coastguard Worker // TODO: When truncation is free, truncate ICmp users' operands to make it a
44*9880d681SAndroid Build Coastguard Worker //       smaller encoding (on x86 at least).
45*9880d681SAndroid Build Coastguard Worker //
46*9880d681SAndroid Build Coastguard Worker // TODO: When a negated register is used by an add (such as in a list of
47*9880d681SAndroid Build Coastguard Worker //       multiple base registers, or as the increment expression in an addrec),
48*9880d681SAndroid Build Coastguard Worker //       we may not actually need both reg and (-1 * reg) in registers; the
49*9880d681SAndroid Build Coastguard Worker //       negation can be implemented by using a sub instead of an add. The
50*9880d681SAndroid Build Coastguard Worker //       lack of support for taking this into consideration when making
51*9880d681SAndroid Build Coastguard Worker //       register pressure decisions is partly worked around by the "Special"
52*9880d681SAndroid Build Coastguard Worker //       use kind.
53*9880d681SAndroid Build Coastguard Worker //
54*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
55*9880d681SAndroid Build Coastguard Worker 
56*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Scalar.h"
57*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/DenseSet.h"
58*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Hashing.h"
59*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/STLExtras.h"
60*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SetVector.h"
61*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallBitVector.h"
62*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/IVUsers.h"
63*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopPass.h"
64*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/ScalarEvolutionExpander.h"
65*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/TargetTransformInfo.h"
66*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Constants.h"
67*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/DerivedTypes.h"
68*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Dominators.h"
69*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Instructions.h"
70*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/IntrinsicInst.h"
71*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Module.h"
72*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/ValueHandle.h"
73*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/CommandLine.h"
74*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
75*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
76*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Utils/Local.h"
78*9880d681SAndroid Build Coastguard Worker #include <algorithm>
79*9880d681SAndroid Build Coastguard Worker using namespace llvm;
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "loop-reduce"
82*9880d681SAndroid Build Coastguard Worker 
83*9880d681SAndroid Build Coastguard Worker /// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
84*9880d681SAndroid Build Coastguard Worker /// bail out. This threshold is far beyond the number of users that LSR can
85*9880d681SAndroid Build Coastguard Worker /// conceivably solve, so it should not affect generated code, but catches the
86*9880d681SAndroid Build Coastguard Worker /// worst cases before LSR burns too much compile time and stack space.
87*9880d681SAndroid Build Coastguard Worker static const unsigned MaxIVUsers = 200;
88*9880d681SAndroid Build Coastguard Worker 
89*9880d681SAndroid Build Coastguard Worker // Temporary flag to cleanup congruent phis after LSR phi expansion.
90*9880d681SAndroid Build Coastguard Worker // It's currently disabled until we can determine whether it's truly useful or
91*9880d681SAndroid Build Coastguard Worker // not. The flag should be removed after the v3.0 release.
92*9880d681SAndroid Build Coastguard Worker // This is now needed for ivchains.
93*9880d681SAndroid Build Coastguard Worker static cl::opt<bool> EnablePhiElim(
94*9880d681SAndroid Build Coastguard Worker   "enable-lsr-phielim", cl::Hidden, cl::init(true),
95*9880d681SAndroid Build Coastguard Worker   cl::desc("Enable LSR phi elimination"));
96*9880d681SAndroid Build Coastguard Worker 
97*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
98*9880d681SAndroid Build Coastguard Worker // Stress test IV chain generation.
99*9880d681SAndroid Build Coastguard Worker static cl::opt<bool> StressIVChain(
100*9880d681SAndroid Build Coastguard Worker   "stress-ivchain", cl::Hidden, cl::init(false),
101*9880d681SAndroid Build Coastguard Worker   cl::desc("Stress test LSR IV chains"));
102*9880d681SAndroid Build Coastguard Worker #else
103*9880d681SAndroid Build Coastguard Worker static bool StressIVChain = false;
104*9880d681SAndroid Build Coastguard Worker #endif
105*9880d681SAndroid Build Coastguard Worker 
106*9880d681SAndroid Build Coastguard Worker namespace {
107*9880d681SAndroid Build Coastguard Worker 
108*9880d681SAndroid Build Coastguard Worker struct MemAccessTy {
109*9880d681SAndroid Build Coastguard Worker   /// Used in situations where the accessed memory type is unknown.
110*9880d681SAndroid Build Coastguard Worker   static const unsigned UnknownAddressSpace = ~0u;
111*9880d681SAndroid Build Coastguard Worker 
112*9880d681SAndroid Build Coastguard Worker   Type *MemTy;
113*9880d681SAndroid Build Coastguard Worker   unsigned AddrSpace;
114*9880d681SAndroid Build Coastguard Worker 
MemAccessTy__anon6e4801cc0111::MemAccessTy115*9880d681SAndroid Build Coastguard Worker   MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
116*9880d681SAndroid Build Coastguard Worker 
MemAccessTy__anon6e4801cc0111::MemAccessTy117*9880d681SAndroid Build Coastguard Worker   MemAccessTy(Type *Ty, unsigned AS) :
118*9880d681SAndroid Build Coastguard Worker     MemTy(Ty), AddrSpace(AS) {}
119*9880d681SAndroid Build Coastguard Worker 
operator ==__anon6e4801cc0111::MemAccessTy120*9880d681SAndroid Build Coastguard Worker   bool operator==(MemAccessTy Other) const {
121*9880d681SAndroid Build Coastguard Worker     return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
122*9880d681SAndroid Build Coastguard Worker   }
123*9880d681SAndroid Build Coastguard Worker 
operator !=__anon6e4801cc0111::MemAccessTy124*9880d681SAndroid Build Coastguard Worker   bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
125*9880d681SAndroid Build Coastguard Worker 
getUnknown__anon6e4801cc0111::MemAccessTy126*9880d681SAndroid Build Coastguard Worker   static MemAccessTy getUnknown(LLVMContext &Ctx) {
127*9880d681SAndroid Build Coastguard Worker     return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
128*9880d681SAndroid Build Coastguard Worker   }
129*9880d681SAndroid Build Coastguard Worker };
130*9880d681SAndroid Build Coastguard Worker 
131*9880d681SAndroid Build Coastguard Worker /// This class holds data which is used to order reuse candidates.
132*9880d681SAndroid Build Coastguard Worker class RegSortData {
133*9880d681SAndroid Build Coastguard Worker public:
134*9880d681SAndroid Build Coastguard Worker   /// This represents the set of LSRUse indices which reference
135*9880d681SAndroid Build Coastguard Worker   /// a particular register.
136*9880d681SAndroid Build Coastguard Worker   SmallBitVector UsedByIndices;
137*9880d681SAndroid Build Coastguard Worker 
138*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
139*9880d681SAndroid Build Coastguard Worker   void dump() const;
140*9880d681SAndroid Build Coastguard Worker };
141*9880d681SAndroid Build Coastguard Worker 
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const144*9880d681SAndroid Build Coastguard Worker void RegSortData::print(raw_ostream &OS) const {
145*9880d681SAndroid Build Coastguard Worker   OS << "[NumUses=" << UsedByIndices.count() << ']';
146*9880d681SAndroid Build Coastguard Worker }
147*9880d681SAndroid Build Coastguard Worker 
148*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const149*9880d681SAndroid Build Coastguard Worker void RegSortData::dump() const {
150*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
151*9880d681SAndroid Build Coastguard Worker }
152*9880d681SAndroid Build Coastguard Worker 
153*9880d681SAndroid Build Coastguard Worker namespace {
154*9880d681SAndroid Build Coastguard Worker 
155*9880d681SAndroid Build Coastguard Worker /// Map register candidates to information about how they are used.
156*9880d681SAndroid Build Coastguard Worker class RegUseTracker {
157*9880d681SAndroid Build Coastguard Worker   typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
158*9880d681SAndroid Build Coastguard Worker 
159*9880d681SAndroid Build Coastguard Worker   RegUsesTy RegUsesMap;
160*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 16> RegSequence;
161*9880d681SAndroid Build Coastguard Worker 
162*9880d681SAndroid Build Coastguard Worker public:
163*9880d681SAndroid Build Coastguard Worker   void countRegister(const SCEV *Reg, size_t LUIdx);
164*9880d681SAndroid Build Coastguard Worker   void dropRegister(const SCEV *Reg, size_t LUIdx);
165*9880d681SAndroid Build Coastguard Worker   void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
166*9880d681SAndroid Build Coastguard Worker 
167*9880d681SAndroid Build Coastguard Worker   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
168*9880d681SAndroid Build Coastguard Worker 
169*9880d681SAndroid Build Coastguard Worker   const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
170*9880d681SAndroid Build Coastguard Worker 
171*9880d681SAndroid Build Coastguard Worker   void clear();
172*9880d681SAndroid Build Coastguard Worker 
173*9880d681SAndroid Build Coastguard Worker   typedef SmallVectorImpl<const SCEV *>::iterator iterator;
174*9880d681SAndroid Build Coastguard Worker   typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
begin()175*9880d681SAndroid Build Coastguard Worker   iterator begin() { return RegSequence.begin(); }
end()176*9880d681SAndroid Build Coastguard Worker   iterator end()   { return RegSequence.end(); }
begin() const177*9880d681SAndroid Build Coastguard Worker   const_iterator begin() const { return RegSequence.begin(); }
end() const178*9880d681SAndroid Build Coastguard Worker   const_iterator end() const   { return RegSequence.end(); }
179*9880d681SAndroid Build Coastguard Worker };
180*9880d681SAndroid Build Coastguard Worker 
181*9880d681SAndroid Build Coastguard Worker }
182*9880d681SAndroid Build Coastguard Worker 
183*9880d681SAndroid Build Coastguard Worker void
countRegister(const SCEV * Reg,size_t LUIdx)184*9880d681SAndroid Build Coastguard Worker RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
185*9880d681SAndroid Build Coastguard Worker   std::pair<RegUsesTy::iterator, bool> Pair =
186*9880d681SAndroid Build Coastguard Worker     RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
187*9880d681SAndroid Build Coastguard Worker   RegSortData &RSD = Pair.first->second;
188*9880d681SAndroid Build Coastguard Worker   if (Pair.second)
189*9880d681SAndroid Build Coastguard Worker     RegSequence.push_back(Reg);
190*9880d681SAndroid Build Coastguard Worker   RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
191*9880d681SAndroid Build Coastguard Worker   RSD.UsedByIndices.set(LUIdx);
192*9880d681SAndroid Build Coastguard Worker }
193*9880d681SAndroid Build Coastguard Worker 
194*9880d681SAndroid Build Coastguard Worker void
dropRegister(const SCEV * Reg,size_t LUIdx)195*9880d681SAndroid Build Coastguard Worker RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
196*9880d681SAndroid Build Coastguard Worker   RegUsesTy::iterator It = RegUsesMap.find(Reg);
197*9880d681SAndroid Build Coastguard Worker   assert(It != RegUsesMap.end());
198*9880d681SAndroid Build Coastguard Worker   RegSortData &RSD = It->second;
199*9880d681SAndroid Build Coastguard Worker   assert(RSD.UsedByIndices.size() > LUIdx);
200*9880d681SAndroid Build Coastguard Worker   RSD.UsedByIndices.reset(LUIdx);
201*9880d681SAndroid Build Coastguard Worker }
202*9880d681SAndroid Build Coastguard Worker 
203*9880d681SAndroid Build Coastguard Worker void
swapAndDropUse(size_t LUIdx,size_t LastLUIdx)204*9880d681SAndroid Build Coastguard Worker RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
205*9880d681SAndroid Build Coastguard Worker   assert(LUIdx <= LastLUIdx);
206*9880d681SAndroid Build Coastguard Worker 
207*9880d681SAndroid Build Coastguard Worker   // Update RegUses. The data structure is not optimized for this purpose;
208*9880d681SAndroid Build Coastguard Worker   // we must iterate through it and update each of the bit vectors.
209*9880d681SAndroid Build Coastguard Worker   for (auto &Pair : RegUsesMap) {
210*9880d681SAndroid Build Coastguard Worker     SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
211*9880d681SAndroid Build Coastguard Worker     if (LUIdx < UsedByIndices.size())
212*9880d681SAndroid Build Coastguard Worker       UsedByIndices[LUIdx] =
213*9880d681SAndroid Build Coastguard Worker         LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
214*9880d681SAndroid Build Coastguard Worker     UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
215*9880d681SAndroid Build Coastguard Worker   }
216*9880d681SAndroid Build Coastguard Worker }
217*9880d681SAndroid Build Coastguard Worker 
218*9880d681SAndroid Build Coastguard Worker bool
isRegUsedByUsesOtherThan(const SCEV * Reg,size_t LUIdx) const219*9880d681SAndroid Build Coastguard Worker RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
220*9880d681SAndroid Build Coastguard Worker   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
221*9880d681SAndroid Build Coastguard Worker   if (I == RegUsesMap.end())
222*9880d681SAndroid Build Coastguard Worker     return false;
223*9880d681SAndroid Build Coastguard Worker   const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
224*9880d681SAndroid Build Coastguard Worker   int i = UsedByIndices.find_first();
225*9880d681SAndroid Build Coastguard Worker   if (i == -1) return false;
226*9880d681SAndroid Build Coastguard Worker   if ((size_t)i != LUIdx) return true;
227*9880d681SAndroid Build Coastguard Worker   return UsedByIndices.find_next(i) != -1;
228*9880d681SAndroid Build Coastguard Worker }
229*9880d681SAndroid Build Coastguard Worker 
getUsedByIndices(const SCEV * Reg) const230*9880d681SAndroid Build Coastguard Worker const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
231*9880d681SAndroid Build Coastguard Worker   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
232*9880d681SAndroid Build Coastguard Worker   assert(I != RegUsesMap.end() && "Unknown register!");
233*9880d681SAndroid Build Coastguard Worker   return I->second.UsedByIndices;
234*9880d681SAndroid Build Coastguard Worker }
235*9880d681SAndroid Build Coastguard Worker 
clear()236*9880d681SAndroid Build Coastguard Worker void RegUseTracker::clear() {
237*9880d681SAndroid Build Coastguard Worker   RegUsesMap.clear();
238*9880d681SAndroid Build Coastguard Worker   RegSequence.clear();
239*9880d681SAndroid Build Coastguard Worker }
240*9880d681SAndroid Build Coastguard Worker 
241*9880d681SAndroid Build Coastguard Worker namespace {
242*9880d681SAndroid Build Coastguard Worker 
243*9880d681SAndroid Build Coastguard Worker /// This class holds information that describes a formula for computing
244*9880d681SAndroid Build Coastguard Worker /// satisfying a use. It may include broken-out immediates and scaled registers.
245*9880d681SAndroid Build Coastguard Worker struct Formula {
246*9880d681SAndroid Build Coastguard Worker   /// Global base address used for complex addressing.
247*9880d681SAndroid Build Coastguard Worker   GlobalValue *BaseGV;
248*9880d681SAndroid Build Coastguard Worker 
249*9880d681SAndroid Build Coastguard Worker   /// Base offset for complex addressing.
250*9880d681SAndroid Build Coastguard Worker   int64_t BaseOffset;
251*9880d681SAndroid Build Coastguard Worker 
252*9880d681SAndroid Build Coastguard Worker   /// Whether any complex addressing has a base register.
253*9880d681SAndroid Build Coastguard Worker   bool HasBaseReg;
254*9880d681SAndroid Build Coastguard Worker 
255*9880d681SAndroid Build Coastguard Worker   /// The scale of any complex addressing.
256*9880d681SAndroid Build Coastguard Worker   int64_t Scale;
257*9880d681SAndroid Build Coastguard Worker 
258*9880d681SAndroid Build Coastguard Worker   /// The list of "base" registers for this use. When this is non-empty. The
259*9880d681SAndroid Build Coastguard Worker   /// canonical representation of a formula is
260*9880d681SAndroid Build Coastguard Worker   /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
261*9880d681SAndroid Build Coastguard Worker   /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
262*9880d681SAndroid Build Coastguard Worker   /// #1 enforces that the scaled register is always used when at least two
263*9880d681SAndroid Build Coastguard Worker   /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
264*9880d681SAndroid Build Coastguard Worker   /// #2 enforces that 1 * reg is reg.
265*9880d681SAndroid Build Coastguard Worker   /// This invariant can be temporarly broken while building a formula.
266*9880d681SAndroid Build Coastguard Worker   /// However, every formula inserted into the LSRInstance must be in canonical
267*9880d681SAndroid Build Coastguard Worker   /// form.
268*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> BaseRegs;
269*9880d681SAndroid Build Coastguard Worker 
270*9880d681SAndroid Build Coastguard Worker   /// The 'scaled' register for this use. This should be non-null when Scale is
271*9880d681SAndroid Build Coastguard Worker   /// not zero.
272*9880d681SAndroid Build Coastguard Worker   const SCEV *ScaledReg;
273*9880d681SAndroid Build Coastguard Worker 
274*9880d681SAndroid Build Coastguard Worker   /// An additional constant offset which added near the use. This requires a
275*9880d681SAndroid Build Coastguard Worker   /// temporary register, but the offset itself can live in an add immediate
276*9880d681SAndroid Build Coastguard Worker   /// field rather than a register.
277*9880d681SAndroid Build Coastguard Worker   int64_t UnfoldedOffset;
278*9880d681SAndroid Build Coastguard Worker 
Formula__anon6e4801cc0311::Formula279*9880d681SAndroid Build Coastguard Worker   Formula()
280*9880d681SAndroid Build Coastguard Worker       : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
281*9880d681SAndroid Build Coastguard Worker         ScaledReg(nullptr), UnfoldedOffset(0) {}
282*9880d681SAndroid Build Coastguard Worker 
283*9880d681SAndroid Build Coastguard Worker   void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
284*9880d681SAndroid Build Coastguard Worker 
285*9880d681SAndroid Build Coastguard Worker   bool isCanonical() const;
286*9880d681SAndroid Build Coastguard Worker 
287*9880d681SAndroid Build Coastguard Worker   void canonicalize();
288*9880d681SAndroid Build Coastguard Worker 
289*9880d681SAndroid Build Coastguard Worker   bool unscale();
290*9880d681SAndroid Build Coastguard Worker 
291*9880d681SAndroid Build Coastguard Worker   size_t getNumRegs() const;
292*9880d681SAndroid Build Coastguard Worker   Type *getType() const;
293*9880d681SAndroid Build Coastguard Worker 
294*9880d681SAndroid Build Coastguard Worker   void deleteBaseReg(const SCEV *&S);
295*9880d681SAndroid Build Coastguard Worker 
296*9880d681SAndroid Build Coastguard Worker   bool referencesReg(const SCEV *S) const;
297*9880d681SAndroid Build Coastguard Worker   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
298*9880d681SAndroid Build Coastguard Worker                                   const RegUseTracker &RegUses) const;
299*9880d681SAndroid Build Coastguard Worker 
300*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
301*9880d681SAndroid Build Coastguard Worker   void dump() const;
302*9880d681SAndroid Build Coastguard Worker };
303*9880d681SAndroid Build Coastguard Worker 
304*9880d681SAndroid Build Coastguard Worker }
305*9880d681SAndroid Build Coastguard Worker 
306*9880d681SAndroid Build Coastguard Worker /// Recursion helper for initialMatch.
DoInitialMatch(const SCEV * S,Loop * L,SmallVectorImpl<const SCEV * > & Good,SmallVectorImpl<const SCEV * > & Bad,ScalarEvolution & SE)307*9880d681SAndroid Build Coastguard Worker static void DoInitialMatch(const SCEV *S, Loop *L,
308*9880d681SAndroid Build Coastguard Worker                            SmallVectorImpl<const SCEV *> &Good,
309*9880d681SAndroid Build Coastguard Worker                            SmallVectorImpl<const SCEV *> &Bad,
310*9880d681SAndroid Build Coastguard Worker                            ScalarEvolution &SE) {
311*9880d681SAndroid Build Coastguard Worker   // Collect expressions which properly dominate the loop header.
312*9880d681SAndroid Build Coastguard Worker   if (SE.properlyDominates(S, L->getHeader())) {
313*9880d681SAndroid Build Coastguard Worker     Good.push_back(S);
314*9880d681SAndroid Build Coastguard Worker     return;
315*9880d681SAndroid Build Coastguard Worker   }
316*9880d681SAndroid Build Coastguard Worker 
317*9880d681SAndroid Build Coastguard Worker   // Look at add operands.
318*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
319*9880d681SAndroid Build Coastguard Worker     for (const SCEV *S : Add->operands())
320*9880d681SAndroid Build Coastguard Worker       DoInitialMatch(S, L, Good, Bad, SE);
321*9880d681SAndroid Build Coastguard Worker     return;
322*9880d681SAndroid Build Coastguard Worker   }
323*9880d681SAndroid Build Coastguard Worker 
324*9880d681SAndroid Build Coastguard Worker   // Look at addrec operands.
325*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
326*9880d681SAndroid Build Coastguard Worker     if (!AR->getStart()->isZero()) {
327*9880d681SAndroid Build Coastguard Worker       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
328*9880d681SAndroid Build Coastguard Worker       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
329*9880d681SAndroid Build Coastguard Worker                                       AR->getStepRecurrence(SE),
330*9880d681SAndroid Build Coastguard Worker                                       // FIXME: AR->getNoWrapFlags()
331*9880d681SAndroid Build Coastguard Worker                                       AR->getLoop(), SCEV::FlagAnyWrap),
332*9880d681SAndroid Build Coastguard Worker                      L, Good, Bad, SE);
333*9880d681SAndroid Build Coastguard Worker       return;
334*9880d681SAndroid Build Coastguard Worker     }
335*9880d681SAndroid Build Coastguard Worker 
336*9880d681SAndroid Build Coastguard Worker   // Handle a multiplication by -1 (negation) if it didn't fold.
337*9880d681SAndroid Build Coastguard Worker   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
338*9880d681SAndroid Build Coastguard Worker     if (Mul->getOperand(0)->isAllOnesValue()) {
339*9880d681SAndroid Build Coastguard Worker       SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
340*9880d681SAndroid Build Coastguard Worker       const SCEV *NewMul = SE.getMulExpr(Ops);
341*9880d681SAndroid Build Coastguard Worker 
342*9880d681SAndroid Build Coastguard Worker       SmallVector<const SCEV *, 4> MyGood;
343*9880d681SAndroid Build Coastguard Worker       SmallVector<const SCEV *, 4> MyBad;
344*9880d681SAndroid Build Coastguard Worker       DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
345*9880d681SAndroid Build Coastguard Worker       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
346*9880d681SAndroid Build Coastguard Worker         SE.getEffectiveSCEVType(NewMul->getType())));
347*9880d681SAndroid Build Coastguard Worker       for (const SCEV *S : MyGood)
348*9880d681SAndroid Build Coastguard Worker         Good.push_back(SE.getMulExpr(NegOne, S));
349*9880d681SAndroid Build Coastguard Worker       for (const SCEV *S : MyBad)
350*9880d681SAndroid Build Coastguard Worker         Bad.push_back(SE.getMulExpr(NegOne, S));
351*9880d681SAndroid Build Coastguard Worker       return;
352*9880d681SAndroid Build Coastguard Worker     }
353*9880d681SAndroid Build Coastguard Worker 
354*9880d681SAndroid Build Coastguard Worker   // Ok, we can't do anything interesting. Just stuff the whole thing into a
355*9880d681SAndroid Build Coastguard Worker   // register and hope for the best.
356*9880d681SAndroid Build Coastguard Worker   Bad.push_back(S);
357*9880d681SAndroid Build Coastguard Worker }
358*9880d681SAndroid Build Coastguard Worker 
359*9880d681SAndroid Build Coastguard Worker /// Incorporate loop-variant parts of S into this Formula, attempting to keep
360*9880d681SAndroid Build Coastguard Worker /// all loop-invariant and loop-computable values in a single base register.
initialMatch(const SCEV * S,Loop * L,ScalarEvolution & SE)361*9880d681SAndroid Build Coastguard Worker void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
362*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Good;
363*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Bad;
364*9880d681SAndroid Build Coastguard Worker   DoInitialMatch(S, L, Good, Bad, SE);
365*9880d681SAndroid Build Coastguard Worker   if (!Good.empty()) {
366*9880d681SAndroid Build Coastguard Worker     const SCEV *Sum = SE.getAddExpr(Good);
367*9880d681SAndroid Build Coastguard Worker     if (!Sum->isZero())
368*9880d681SAndroid Build Coastguard Worker       BaseRegs.push_back(Sum);
369*9880d681SAndroid Build Coastguard Worker     HasBaseReg = true;
370*9880d681SAndroid Build Coastguard Worker   }
371*9880d681SAndroid Build Coastguard Worker   if (!Bad.empty()) {
372*9880d681SAndroid Build Coastguard Worker     const SCEV *Sum = SE.getAddExpr(Bad);
373*9880d681SAndroid Build Coastguard Worker     if (!Sum->isZero())
374*9880d681SAndroid Build Coastguard Worker       BaseRegs.push_back(Sum);
375*9880d681SAndroid Build Coastguard Worker     HasBaseReg = true;
376*9880d681SAndroid Build Coastguard Worker   }
377*9880d681SAndroid Build Coastguard Worker   canonicalize();
378*9880d681SAndroid Build Coastguard Worker }
379*9880d681SAndroid Build Coastguard Worker 
380*9880d681SAndroid Build Coastguard Worker /// \brief Check whether or not this formula statisfies the canonical
381*9880d681SAndroid Build Coastguard Worker /// representation.
382*9880d681SAndroid Build Coastguard Worker /// \see Formula::BaseRegs.
isCanonical() const383*9880d681SAndroid Build Coastguard Worker bool Formula::isCanonical() const {
384*9880d681SAndroid Build Coastguard Worker   if (ScaledReg)
385*9880d681SAndroid Build Coastguard Worker     return Scale != 1 || !BaseRegs.empty();
386*9880d681SAndroid Build Coastguard Worker   return BaseRegs.size() <= 1;
387*9880d681SAndroid Build Coastguard Worker }
388*9880d681SAndroid Build Coastguard Worker 
389*9880d681SAndroid Build Coastguard Worker /// \brief Helper method to morph a formula into its canonical representation.
390*9880d681SAndroid Build Coastguard Worker /// \see Formula::BaseRegs.
391*9880d681SAndroid Build Coastguard Worker /// Every formula having more than one base register, must use the ScaledReg
392*9880d681SAndroid Build Coastguard Worker /// field. Otherwise, we would have to do special cases everywhere in LSR
393*9880d681SAndroid Build Coastguard Worker /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
394*9880d681SAndroid Build Coastguard Worker /// On the other hand, 1*reg should be canonicalized into reg.
canonicalize()395*9880d681SAndroid Build Coastguard Worker void Formula::canonicalize() {
396*9880d681SAndroid Build Coastguard Worker   if (isCanonical())
397*9880d681SAndroid Build Coastguard Worker     return;
398*9880d681SAndroid Build Coastguard Worker   // So far we did not need this case. This is easy to implement but it is
399*9880d681SAndroid Build Coastguard Worker   // useless to maintain dead code. Beside it could hurt compile time.
400*9880d681SAndroid Build Coastguard Worker   assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
401*9880d681SAndroid Build Coastguard Worker   // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
402*9880d681SAndroid Build Coastguard Worker   ScaledReg = BaseRegs.back();
403*9880d681SAndroid Build Coastguard Worker   BaseRegs.pop_back();
404*9880d681SAndroid Build Coastguard Worker   Scale = 1;
405*9880d681SAndroid Build Coastguard Worker   size_t BaseRegsSize = BaseRegs.size();
406*9880d681SAndroid Build Coastguard Worker   size_t Try = 0;
407*9880d681SAndroid Build Coastguard Worker   // If ScaledReg is an invariant, try to find a variant expression.
408*9880d681SAndroid Build Coastguard Worker   while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
409*9880d681SAndroid Build Coastguard Worker     std::swap(ScaledReg, BaseRegs[Try++]);
410*9880d681SAndroid Build Coastguard Worker }
411*9880d681SAndroid Build Coastguard Worker 
412*9880d681SAndroid Build Coastguard Worker /// \brief Get rid of the scale in the formula.
413*9880d681SAndroid Build Coastguard Worker /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
414*9880d681SAndroid Build Coastguard Worker /// \return true if it was possible to get rid of the scale, false otherwise.
415*9880d681SAndroid Build Coastguard Worker /// \note After this operation the formula may not be in the canonical form.
unscale()416*9880d681SAndroid Build Coastguard Worker bool Formula::unscale() {
417*9880d681SAndroid Build Coastguard Worker   if (Scale != 1)
418*9880d681SAndroid Build Coastguard Worker     return false;
419*9880d681SAndroid Build Coastguard Worker   Scale = 0;
420*9880d681SAndroid Build Coastguard Worker   BaseRegs.push_back(ScaledReg);
421*9880d681SAndroid Build Coastguard Worker   ScaledReg = nullptr;
422*9880d681SAndroid Build Coastguard Worker   return true;
423*9880d681SAndroid Build Coastguard Worker }
424*9880d681SAndroid Build Coastguard Worker 
425*9880d681SAndroid Build Coastguard Worker /// Return the total number of register operands used by this formula. This does
426*9880d681SAndroid Build Coastguard Worker /// not include register uses implied by non-constant addrec strides.
getNumRegs() const427*9880d681SAndroid Build Coastguard Worker size_t Formula::getNumRegs() const {
428*9880d681SAndroid Build Coastguard Worker   return !!ScaledReg + BaseRegs.size();
429*9880d681SAndroid Build Coastguard Worker }
430*9880d681SAndroid Build Coastguard Worker 
431*9880d681SAndroid Build Coastguard Worker /// Return the type of this formula, if it has one, or null otherwise. This type
432*9880d681SAndroid Build Coastguard Worker /// is meaningless except for the bit size.
getType() const433*9880d681SAndroid Build Coastguard Worker Type *Formula::getType() const {
434*9880d681SAndroid Build Coastguard Worker   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
435*9880d681SAndroid Build Coastguard Worker          ScaledReg ? ScaledReg->getType() :
436*9880d681SAndroid Build Coastguard Worker          BaseGV ? BaseGV->getType() :
437*9880d681SAndroid Build Coastguard Worker          nullptr;
438*9880d681SAndroid Build Coastguard Worker }
439*9880d681SAndroid Build Coastguard Worker 
440*9880d681SAndroid Build Coastguard Worker /// Delete the given base reg from the BaseRegs list.
deleteBaseReg(const SCEV * & S)441*9880d681SAndroid Build Coastguard Worker void Formula::deleteBaseReg(const SCEV *&S) {
442*9880d681SAndroid Build Coastguard Worker   if (&S != &BaseRegs.back())
443*9880d681SAndroid Build Coastguard Worker     std::swap(S, BaseRegs.back());
444*9880d681SAndroid Build Coastguard Worker   BaseRegs.pop_back();
445*9880d681SAndroid Build Coastguard Worker }
446*9880d681SAndroid Build Coastguard Worker 
447*9880d681SAndroid Build Coastguard Worker /// Test if this formula references the given register.
referencesReg(const SCEV * S) const448*9880d681SAndroid Build Coastguard Worker bool Formula::referencesReg(const SCEV *S) const {
449*9880d681SAndroid Build Coastguard Worker   return S == ScaledReg ||
450*9880d681SAndroid Build Coastguard Worker          std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
451*9880d681SAndroid Build Coastguard Worker }
452*9880d681SAndroid Build Coastguard Worker 
453*9880d681SAndroid Build Coastguard Worker /// Test whether this formula uses registers which are used by uses other than
454*9880d681SAndroid Build Coastguard Worker /// the use with the given index.
hasRegsUsedByUsesOtherThan(size_t LUIdx,const RegUseTracker & RegUses) const455*9880d681SAndroid Build Coastguard Worker bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
456*9880d681SAndroid Build Coastguard Worker                                          const RegUseTracker &RegUses) const {
457*9880d681SAndroid Build Coastguard Worker   if (ScaledReg)
458*9880d681SAndroid Build Coastguard Worker     if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
459*9880d681SAndroid Build Coastguard Worker       return true;
460*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : BaseRegs)
461*9880d681SAndroid Build Coastguard Worker     if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
462*9880d681SAndroid Build Coastguard Worker       return true;
463*9880d681SAndroid Build Coastguard Worker   return false;
464*9880d681SAndroid Build Coastguard Worker }
465*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const466*9880d681SAndroid Build Coastguard Worker void Formula::print(raw_ostream &OS) const {
467*9880d681SAndroid Build Coastguard Worker   bool First = true;
468*9880d681SAndroid Build Coastguard Worker   if (BaseGV) {
469*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
470*9880d681SAndroid Build Coastguard Worker     BaseGV->printAsOperand(OS, /*PrintType=*/false);
471*9880d681SAndroid Build Coastguard Worker   }
472*9880d681SAndroid Build Coastguard Worker   if (BaseOffset != 0) {
473*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
474*9880d681SAndroid Build Coastguard Worker     OS << BaseOffset;
475*9880d681SAndroid Build Coastguard Worker   }
476*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : BaseRegs) {
477*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
478*9880d681SAndroid Build Coastguard Worker     OS << "reg(" << *BaseReg << ')';
479*9880d681SAndroid Build Coastguard Worker   }
480*9880d681SAndroid Build Coastguard Worker   if (HasBaseReg && BaseRegs.empty()) {
481*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
482*9880d681SAndroid Build Coastguard Worker     OS << "**error: HasBaseReg**";
483*9880d681SAndroid Build Coastguard Worker   } else if (!HasBaseReg && !BaseRegs.empty()) {
484*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
485*9880d681SAndroid Build Coastguard Worker     OS << "**error: !HasBaseReg**";
486*9880d681SAndroid Build Coastguard Worker   }
487*9880d681SAndroid Build Coastguard Worker   if (Scale != 0) {
488*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + "; else First = false;
489*9880d681SAndroid Build Coastguard Worker     OS << Scale << "*reg(";
490*9880d681SAndroid Build Coastguard Worker     if (ScaledReg)
491*9880d681SAndroid Build Coastguard Worker       OS << *ScaledReg;
492*9880d681SAndroid Build Coastguard Worker     else
493*9880d681SAndroid Build Coastguard Worker       OS << "<unknown>";
494*9880d681SAndroid Build Coastguard Worker     OS << ')';
495*9880d681SAndroid Build Coastguard Worker   }
496*9880d681SAndroid Build Coastguard Worker   if (UnfoldedOffset != 0) {
497*9880d681SAndroid Build Coastguard Worker     if (!First) OS << " + ";
498*9880d681SAndroid Build Coastguard Worker     OS << "imm(" << UnfoldedOffset << ')';
499*9880d681SAndroid Build Coastguard Worker   }
500*9880d681SAndroid Build Coastguard Worker }
501*9880d681SAndroid Build Coastguard Worker 
502*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const503*9880d681SAndroid Build Coastguard Worker void Formula::dump() const {
504*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
505*9880d681SAndroid Build Coastguard Worker }
506*9880d681SAndroid Build Coastguard Worker 
507*9880d681SAndroid Build Coastguard Worker /// Return true if the given addrec can be sign-extended without changing its
508*9880d681SAndroid Build Coastguard Worker /// value.
isAddRecSExtable(const SCEVAddRecExpr * AR,ScalarEvolution & SE)509*9880d681SAndroid Build Coastguard Worker static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
510*9880d681SAndroid Build Coastguard Worker   Type *WideTy =
511*9880d681SAndroid Build Coastguard Worker     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
512*9880d681SAndroid Build Coastguard Worker   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
513*9880d681SAndroid Build Coastguard Worker }
514*9880d681SAndroid Build Coastguard Worker 
515*9880d681SAndroid Build Coastguard Worker /// Return true if the given add can be sign-extended without changing its
516*9880d681SAndroid Build Coastguard Worker /// value.
isAddSExtable(const SCEVAddExpr * A,ScalarEvolution & SE)517*9880d681SAndroid Build Coastguard Worker static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
518*9880d681SAndroid Build Coastguard Worker   Type *WideTy =
519*9880d681SAndroid Build Coastguard Worker     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
520*9880d681SAndroid Build Coastguard Worker   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
521*9880d681SAndroid Build Coastguard Worker }
522*9880d681SAndroid Build Coastguard Worker 
523*9880d681SAndroid Build Coastguard Worker /// Return true if the given mul can be sign-extended without changing its
524*9880d681SAndroid Build Coastguard Worker /// value.
isMulSExtable(const SCEVMulExpr * M,ScalarEvolution & SE)525*9880d681SAndroid Build Coastguard Worker static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
526*9880d681SAndroid Build Coastguard Worker   Type *WideTy =
527*9880d681SAndroid Build Coastguard Worker     IntegerType::get(SE.getContext(),
528*9880d681SAndroid Build Coastguard Worker                      SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
529*9880d681SAndroid Build Coastguard Worker   return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
530*9880d681SAndroid Build Coastguard Worker }
531*9880d681SAndroid Build Coastguard Worker 
532*9880d681SAndroid Build Coastguard Worker /// Return an expression for LHS /s RHS, if it can be determined and if the
533*9880d681SAndroid Build Coastguard Worker /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
534*9880d681SAndroid Build Coastguard Worker /// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
535*9880d681SAndroid Build Coastguard Worker /// the multiplication may overflow, which is useful when the result will be
536*9880d681SAndroid Build Coastguard Worker /// used in a context where the most significant bits are ignored.
getExactSDiv(const SCEV * LHS,const SCEV * RHS,ScalarEvolution & SE,bool IgnoreSignificantBits=false)537*9880d681SAndroid Build Coastguard Worker static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
538*9880d681SAndroid Build Coastguard Worker                                 ScalarEvolution &SE,
539*9880d681SAndroid Build Coastguard Worker                                 bool IgnoreSignificantBits = false) {
540*9880d681SAndroid Build Coastguard Worker   // Handle the trivial case, which works for any SCEV type.
541*9880d681SAndroid Build Coastguard Worker   if (LHS == RHS)
542*9880d681SAndroid Build Coastguard Worker     return SE.getConstant(LHS->getType(), 1);
543*9880d681SAndroid Build Coastguard Worker 
544*9880d681SAndroid Build Coastguard Worker   // Handle a few RHS special cases.
545*9880d681SAndroid Build Coastguard Worker   const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
546*9880d681SAndroid Build Coastguard Worker   if (RC) {
547*9880d681SAndroid Build Coastguard Worker     const APInt &RA = RC->getAPInt();
548*9880d681SAndroid Build Coastguard Worker     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
549*9880d681SAndroid Build Coastguard Worker     // some folding.
550*9880d681SAndroid Build Coastguard Worker     if (RA.isAllOnesValue())
551*9880d681SAndroid Build Coastguard Worker       return SE.getMulExpr(LHS, RC);
552*9880d681SAndroid Build Coastguard Worker     // Handle x /s 1 as x.
553*9880d681SAndroid Build Coastguard Worker     if (RA == 1)
554*9880d681SAndroid Build Coastguard Worker       return LHS;
555*9880d681SAndroid Build Coastguard Worker   }
556*9880d681SAndroid Build Coastguard Worker 
557*9880d681SAndroid Build Coastguard Worker   // Check for a division of a constant by a constant.
558*9880d681SAndroid Build Coastguard Worker   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
559*9880d681SAndroid Build Coastguard Worker     if (!RC)
560*9880d681SAndroid Build Coastguard Worker       return nullptr;
561*9880d681SAndroid Build Coastguard Worker     const APInt &LA = C->getAPInt();
562*9880d681SAndroid Build Coastguard Worker     const APInt &RA = RC->getAPInt();
563*9880d681SAndroid Build Coastguard Worker     if (LA.srem(RA) != 0)
564*9880d681SAndroid Build Coastguard Worker       return nullptr;
565*9880d681SAndroid Build Coastguard Worker     return SE.getConstant(LA.sdiv(RA));
566*9880d681SAndroid Build Coastguard Worker   }
567*9880d681SAndroid Build Coastguard Worker 
568*9880d681SAndroid Build Coastguard Worker   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
569*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
570*9880d681SAndroid Build Coastguard Worker     if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
571*9880d681SAndroid Build Coastguard Worker       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
572*9880d681SAndroid Build Coastguard Worker                                       IgnoreSignificantBits);
573*9880d681SAndroid Build Coastguard Worker       if (!Step) return nullptr;
574*9880d681SAndroid Build Coastguard Worker       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
575*9880d681SAndroid Build Coastguard Worker                                        IgnoreSignificantBits);
576*9880d681SAndroid Build Coastguard Worker       if (!Start) return nullptr;
577*9880d681SAndroid Build Coastguard Worker       // FlagNW is independent of the start value, step direction, and is
578*9880d681SAndroid Build Coastguard Worker       // preserved with smaller magnitude steps.
579*9880d681SAndroid Build Coastguard Worker       // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
580*9880d681SAndroid Build Coastguard Worker       return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
581*9880d681SAndroid Build Coastguard Worker     }
582*9880d681SAndroid Build Coastguard Worker     return nullptr;
583*9880d681SAndroid Build Coastguard Worker   }
584*9880d681SAndroid Build Coastguard Worker 
585*9880d681SAndroid Build Coastguard Worker   // Distribute the sdiv over add operands, if the add doesn't overflow.
586*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
587*9880d681SAndroid Build Coastguard Worker     if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
588*9880d681SAndroid Build Coastguard Worker       SmallVector<const SCEV *, 8> Ops;
589*9880d681SAndroid Build Coastguard Worker       for (const SCEV *S : Add->operands()) {
590*9880d681SAndroid Build Coastguard Worker         const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
591*9880d681SAndroid Build Coastguard Worker         if (!Op) return nullptr;
592*9880d681SAndroid Build Coastguard Worker         Ops.push_back(Op);
593*9880d681SAndroid Build Coastguard Worker       }
594*9880d681SAndroid Build Coastguard Worker       return SE.getAddExpr(Ops);
595*9880d681SAndroid Build Coastguard Worker     }
596*9880d681SAndroid Build Coastguard Worker     return nullptr;
597*9880d681SAndroid Build Coastguard Worker   }
598*9880d681SAndroid Build Coastguard Worker 
599*9880d681SAndroid Build Coastguard Worker   // Check for a multiply operand that we can pull RHS out of.
600*9880d681SAndroid Build Coastguard Worker   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
601*9880d681SAndroid Build Coastguard Worker     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
602*9880d681SAndroid Build Coastguard Worker       SmallVector<const SCEV *, 4> Ops;
603*9880d681SAndroid Build Coastguard Worker       bool Found = false;
604*9880d681SAndroid Build Coastguard Worker       for (const SCEV *S : Mul->operands()) {
605*9880d681SAndroid Build Coastguard Worker         if (!Found)
606*9880d681SAndroid Build Coastguard Worker           if (const SCEV *Q = getExactSDiv(S, RHS, SE,
607*9880d681SAndroid Build Coastguard Worker                                            IgnoreSignificantBits)) {
608*9880d681SAndroid Build Coastguard Worker             S = Q;
609*9880d681SAndroid Build Coastguard Worker             Found = true;
610*9880d681SAndroid Build Coastguard Worker           }
611*9880d681SAndroid Build Coastguard Worker         Ops.push_back(S);
612*9880d681SAndroid Build Coastguard Worker       }
613*9880d681SAndroid Build Coastguard Worker       return Found ? SE.getMulExpr(Ops) : nullptr;
614*9880d681SAndroid Build Coastguard Worker     }
615*9880d681SAndroid Build Coastguard Worker     return nullptr;
616*9880d681SAndroid Build Coastguard Worker   }
617*9880d681SAndroid Build Coastguard Worker 
618*9880d681SAndroid Build Coastguard Worker   // Otherwise we don't know.
619*9880d681SAndroid Build Coastguard Worker   return nullptr;
620*9880d681SAndroid Build Coastguard Worker }
621*9880d681SAndroid Build Coastguard Worker 
622*9880d681SAndroid Build Coastguard Worker /// If S involves the addition of a constant integer value, return that integer
623*9880d681SAndroid Build Coastguard Worker /// value, and mutate S to point to a new SCEV with that value excluded.
ExtractImmediate(const SCEV * & S,ScalarEvolution & SE)624*9880d681SAndroid Build Coastguard Worker static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
625*9880d681SAndroid Build Coastguard Worker   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
626*9880d681SAndroid Build Coastguard Worker     if (C->getAPInt().getMinSignedBits() <= 64) {
627*9880d681SAndroid Build Coastguard Worker       S = SE.getConstant(C->getType(), 0);
628*9880d681SAndroid Build Coastguard Worker       return C->getValue()->getSExtValue();
629*9880d681SAndroid Build Coastguard Worker     }
630*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
631*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
632*9880d681SAndroid Build Coastguard Worker     int64_t Result = ExtractImmediate(NewOps.front(), SE);
633*9880d681SAndroid Build Coastguard Worker     if (Result != 0)
634*9880d681SAndroid Build Coastguard Worker       S = SE.getAddExpr(NewOps);
635*9880d681SAndroid Build Coastguard Worker     return Result;
636*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
637*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
638*9880d681SAndroid Build Coastguard Worker     int64_t Result = ExtractImmediate(NewOps.front(), SE);
639*9880d681SAndroid Build Coastguard Worker     if (Result != 0)
640*9880d681SAndroid Build Coastguard Worker       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
641*9880d681SAndroid Build Coastguard Worker                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
642*9880d681SAndroid Build Coastguard Worker                            SCEV::FlagAnyWrap);
643*9880d681SAndroid Build Coastguard Worker     return Result;
644*9880d681SAndroid Build Coastguard Worker   }
645*9880d681SAndroid Build Coastguard Worker   return 0;
646*9880d681SAndroid Build Coastguard Worker }
647*9880d681SAndroid Build Coastguard Worker 
648*9880d681SAndroid Build Coastguard Worker /// If S involves the addition of a GlobalValue address, return that symbol, and
649*9880d681SAndroid Build Coastguard Worker /// mutate S to point to a new SCEV with that value excluded.
ExtractSymbol(const SCEV * & S,ScalarEvolution & SE)650*9880d681SAndroid Build Coastguard Worker static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
651*9880d681SAndroid Build Coastguard Worker   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
652*9880d681SAndroid Build Coastguard Worker     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
653*9880d681SAndroid Build Coastguard Worker       S = SE.getConstant(GV->getType(), 0);
654*9880d681SAndroid Build Coastguard Worker       return GV;
655*9880d681SAndroid Build Coastguard Worker     }
656*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
657*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
658*9880d681SAndroid Build Coastguard Worker     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
659*9880d681SAndroid Build Coastguard Worker     if (Result)
660*9880d681SAndroid Build Coastguard Worker       S = SE.getAddExpr(NewOps);
661*9880d681SAndroid Build Coastguard Worker     return Result;
662*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
663*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
664*9880d681SAndroid Build Coastguard Worker     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
665*9880d681SAndroid Build Coastguard Worker     if (Result)
666*9880d681SAndroid Build Coastguard Worker       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
667*9880d681SAndroid Build Coastguard Worker                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
668*9880d681SAndroid Build Coastguard Worker                            SCEV::FlagAnyWrap);
669*9880d681SAndroid Build Coastguard Worker     return Result;
670*9880d681SAndroid Build Coastguard Worker   }
671*9880d681SAndroid Build Coastguard Worker   return nullptr;
672*9880d681SAndroid Build Coastguard Worker }
673*9880d681SAndroid Build Coastguard Worker 
674*9880d681SAndroid Build Coastguard Worker /// Returns true if the specified instruction is using the specified value as an
675*9880d681SAndroid Build Coastguard Worker /// address.
isAddressUse(Instruction * Inst,Value * OperandVal)676*9880d681SAndroid Build Coastguard Worker static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
677*9880d681SAndroid Build Coastguard Worker   bool isAddress = isa<LoadInst>(Inst);
678*9880d681SAndroid Build Coastguard Worker   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
679*9880d681SAndroid Build Coastguard Worker     if (SI->getOperand(1) == OperandVal)
680*9880d681SAndroid Build Coastguard Worker       isAddress = true;
681*9880d681SAndroid Build Coastguard Worker   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
682*9880d681SAndroid Build Coastguard Worker     // Addressing modes can also be folded into prefetches and a variety
683*9880d681SAndroid Build Coastguard Worker     // of intrinsics.
684*9880d681SAndroid Build Coastguard Worker     switch (II->getIntrinsicID()) {
685*9880d681SAndroid Build Coastguard Worker       default: break;
686*9880d681SAndroid Build Coastguard Worker       case Intrinsic::prefetch:
687*9880d681SAndroid Build Coastguard Worker         if (II->getArgOperand(0) == OperandVal)
688*9880d681SAndroid Build Coastguard Worker           isAddress = true;
689*9880d681SAndroid Build Coastguard Worker         break;
690*9880d681SAndroid Build Coastguard Worker     }
691*9880d681SAndroid Build Coastguard Worker   }
692*9880d681SAndroid Build Coastguard Worker   return isAddress;
693*9880d681SAndroid Build Coastguard Worker }
694*9880d681SAndroid Build Coastguard Worker 
695*9880d681SAndroid Build Coastguard Worker /// Return the type of the memory being accessed.
getAccessType(const Instruction * Inst)696*9880d681SAndroid Build Coastguard Worker static MemAccessTy getAccessType(const Instruction *Inst) {
697*9880d681SAndroid Build Coastguard Worker   MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
698*9880d681SAndroid Build Coastguard Worker   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
699*9880d681SAndroid Build Coastguard Worker     AccessTy.MemTy = SI->getOperand(0)->getType();
700*9880d681SAndroid Build Coastguard Worker     AccessTy.AddrSpace = SI->getPointerAddressSpace();
701*9880d681SAndroid Build Coastguard Worker   } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
702*9880d681SAndroid Build Coastguard Worker     AccessTy.AddrSpace = LI->getPointerAddressSpace();
703*9880d681SAndroid Build Coastguard Worker   }
704*9880d681SAndroid Build Coastguard Worker 
705*9880d681SAndroid Build Coastguard Worker   // All pointers have the same requirements, so canonicalize them to an
706*9880d681SAndroid Build Coastguard Worker   // arbitrary pointer type to minimize variation.
707*9880d681SAndroid Build Coastguard Worker   if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
708*9880d681SAndroid Build Coastguard Worker     AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
709*9880d681SAndroid Build Coastguard Worker                                       PTy->getAddressSpace());
710*9880d681SAndroid Build Coastguard Worker 
711*9880d681SAndroid Build Coastguard Worker   return AccessTy;
712*9880d681SAndroid Build Coastguard Worker }
713*9880d681SAndroid Build Coastguard Worker 
714*9880d681SAndroid Build Coastguard Worker /// Return true if this AddRec is already a phi in its loop.
isExistingPhi(const SCEVAddRecExpr * AR,ScalarEvolution & SE)715*9880d681SAndroid Build Coastguard Worker static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
716*9880d681SAndroid Build Coastguard Worker   for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
717*9880d681SAndroid Build Coastguard Worker        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
718*9880d681SAndroid Build Coastguard Worker     if (SE.isSCEVable(PN->getType()) &&
719*9880d681SAndroid Build Coastguard Worker         (SE.getEffectiveSCEVType(PN->getType()) ==
720*9880d681SAndroid Build Coastguard Worker          SE.getEffectiveSCEVType(AR->getType())) &&
721*9880d681SAndroid Build Coastguard Worker         SE.getSCEV(PN) == AR)
722*9880d681SAndroid Build Coastguard Worker       return true;
723*9880d681SAndroid Build Coastguard Worker   }
724*9880d681SAndroid Build Coastguard Worker   return false;
725*9880d681SAndroid Build Coastguard Worker }
726*9880d681SAndroid Build Coastguard Worker 
727*9880d681SAndroid Build Coastguard Worker /// Check if expanding this expression is likely to incur significant cost. This
728*9880d681SAndroid Build Coastguard Worker /// is tricky because SCEV doesn't track which expressions are actually computed
729*9880d681SAndroid Build Coastguard Worker /// by the current IR.
730*9880d681SAndroid Build Coastguard Worker ///
731*9880d681SAndroid Build Coastguard Worker /// We currently allow expansion of IV increments that involve adds,
732*9880d681SAndroid Build Coastguard Worker /// multiplication by constants, and AddRecs from existing phis.
733*9880d681SAndroid Build Coastguard Worker ///
734*9880d681SAndroid Build Coastguard Worker /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
735*9880d681SAndroid Build Coastguard Worker /// obvious multiple of the UDivExpr.
isHighCostExpansion(const SCEV * S,SmallPtrSetImpl<const SCEV * > & Processed,ScalarEvolution & SE)736*9880d681SAndroid Build Coastguard Worker static bool isHighCostExpansion(const SCEV *S,
737*9880d681SAndroid Build Coastguard Worker                                 SmallPtrSetImpl<const SCEV*> &Processed,
738*9880d681SAndroid Build Coastguard Worker                                 ScalarEvolution &SE) {
739*9880d681SAndroid Build Coastguard Worker   // Zero/One operand expressions
740*9880d681SAndroid Build Coastguard Worker   switch (S->getSCEVType()) {
741*9880d681SAndroid Build Coastguard Worker   case scUnknown:
742*9880d681SAndroid Build Coastguard Worker   case scConstant:
743*9880d681SAndroid Build Coastguard Worker     return false;
744*9880d681SAndroid Build Coastguard Worker   case scTruncate:
745*9880d681SAndroid Build Coastguard Worker     return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
746*9880d681SAndroid Build Coastguard Worker                                Processed, SE);
747*9880d681SAndroid Build Coastguard Worker   case scZeroExtend:
748*9880d681SAndroid Build Coastguard Worker     return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
749*9880d681SAndroid Build Coastguard Worker                                Processed, SE);
750*9880d681SAndroid Build Coastguard Worker   case scSignExtend:
751*9880d681SAndroid Build Coastguard Worker     return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
752*9880d681SAndroid Build Coastguard Worker                                Processed, SE);
753*9880d681SAndroid Build Coastguard Worker   }
754*9880d681SAndroid Build Coastguard Worker 
755*9880d681SAndroid Build Coastguard Worker   if (!Processed.insert(S).second)
756*9880d681SAndroid Build Coastguard Worker     return false;
757*9880d681SAndroid Build Coastguard Worker 
758*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
759*9880d681SAndroid Build Coastguard Worker     for (const SCEV *S : Add->operands()) {
760*9880d681SAndroid Build Coastguard Worker       if (isHighCostExpansion(S, Processed, SE))
761*9880d681SAndroid Build Coastguard Worker         return true;
762*9880d681SAndroid Build Coastguard Worker     }
763*9880d681SAndroid Build Coastguard Worker     return false;
764*9880d681SAndroid Build Coastguard Worker   }
765*9880d681SAndroid Build Coastguard Worker 
766*9880d681SAndroid Build Coastguard Worker   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
767*9880d681SAndroid Build Coastguard Worker     if (Mul->getNumOperands() == 2) {
768*9880d681SAndroid Build Coastguard Worker       // Multiplication by a constant is ok
769*9880d681SAndroid Build Coastguard Worker       if (isa<SCEVConstant>(Mul->getOperand(0)))
770*9880d681SAndroid Build Coastguard Worker         return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
771*9880d681SAndroid Build Coastguard Worker 
772*9880d681SAndroid Build Coastguard Worker       // If we have the value of one operand, check if an existing
773*9880d681SAndroid Build Coastguard Worker       // multiplication already generates this expression.
774*9880d681SAndroid Build Coastguard Worker       if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
775*9880d681SAndroid Build Coastguard Worker         Value *UVal = U->getValue();
776*9880d681SAndroid Build Coastguard Worker         for (User *UR : UVal->users()) {
777*9880d681SAndroid Build Coastguard Worker           // If U is a constant, it may be used by a ConstantExpr.
778*9880d681SAndroid Build Coastguard Worker           Instruction *UI = dyn_cast<Instruction>(UR);
779*9880d681SAndroid Build Coastguard Worker           if (UI && UI->getOpcode() == Instruction::Mul &&
780*9880d681SAndroid Build Coastguard Worker               SE.isSCEVable(UI->getType())) {
781*9880d681SAndroid Build Coastguard Worker             return SE.getSCEV(UI) == Mul;
782*9880d681SAndroid Build Coastguard Worker           }
783*9880d681SAndroid Build Coastguard Worker         }
784*9880d681SAndroid Build Coastguard Worker       }
785*9880d681SAndroid Build Coastguard Worker     }
786*9880d681SAndroid Build Coastguard Worker   }
787*9880d681SAndroid Build Coastguard Worker 
788*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
789*9880d681SAndroid Build Coastguard Worker     if (isExistingPhi(AR, SE))
790*9880d681SAndroid Build Coastguard Worker       return false;
791*9880d681SAndroid Build Coastguard Worker   }
792*9880d681SAndroid Build Coastguard Worker 
793*9880d681SAndroid Build Coastguard Worker   // Fow now, consider any other type of expression (div/mul/min/max) high cost.
794*9880d681SAndroid Build Coastguard Worker   return true;
795*9880d681SAndroid Build Coastguard Worker }
796*9880d681SAndroid Build Coastguard Worker 
797*9880d681SAndroid Build Coastguard Worker /// If any of the instructions is the specified set are trivially dead, delete
798*9880d681SAndroid Build Coastguard Worker /// them and see if this makes any of their operands subsequently dead.
799*9880d681SAndroid Build Coastguard Worker static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> & DeadInsts)800*9880d681SAndroid Build Coastguard Worker DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
801*9880d681SAndroid Build Coastguard Worker   bool Changed = false;
802*9880d681SAndroid Build Coastguard Worker 
803*9880d681SAndroid Build Coastguard Worker   while (!DeadInsts.empty()) {
804*9880d681SAndroid Build Coastguard Worker     Value *V = DeadInsts.pop_back_val();
805*9880d681SAndroid Build Coastguard Worker     Instruction *I = dyn_cast_or_null<Instruction>(V);
806*9880d681SAndroid Build Coastguard Worker 
807*9880d681SAndroid Build Coastguard Worker     if (!I || !isInstructionTriviallyDead(I))
808*9880d681SAndroid Build Coastguard Worker       continue;
809*9880d681SAndroid Build Coastguard Worker 
810*9880d681SAndroid Build Coastguard Worker     for (Use &O : I->operands())
811*9880d681SAndroid Build Coastguard Worker       if (Instruction *U = dyn_cast<Instruction>(O)) {
812*9880d681SAndroid Build Coastguard Worker         O = nullptr;
813*9880d681SAndroid Build Coastguard Worker         if (U->use_empty())
814*9880d681SAndroid Build Coastguard Worker           DeadInsts.emplace_back(U);
815*9880d681SAndroid Build Coastguard Worker       }
816*9880d681SAndroid Build Coastguard Worker 
817*9880d681SAndroid Build Coastguard Worker     I->eraseFromParent();
818*9880d681SAndroid Build Coastguard Worker     Changed = true;
819*9880d681SAndroid Build Coastguard Worker   }
820*9880d681SAndroid Build Coastguard Worker 
821*9880d681SAndroid Build Coastguard Worker   return Changed;
822*9880d681SAndroid Build Coastguard Worker }
823*9880d681SAndroid Build Coastguard Worker 
824*9880d681SAndroid Build Coastguard Worker namespace {
825*9880d681SAndroid Build Coastguard Worker class LSRUse;
826*9880d681SAndroid Build Coastguard Worker }
827*9880d681SAndroid Build Coastguard Worker 
828*9880d681SAndroid Build Coastguard Worker /// \brief Check if the addressing mode defined by \p F is completely
829*9880d681SAndroid Build Coastguard Worker /// folded in \p LU at isel time.
830*9880d681SAndroid Build Coastguard Worker /// This includes address-mode folding and special icmp tricks.
831*9880d681SAndroid Build Coastguard Worker /// This function returns true if \p LU can accommodate what \p F
832*9880d681SAndroid Build Coastguard Worker /// defines and up to 1 base + 1 scaled + offset.
833*9880d681SAndroid Build Coastguard Worker /// In other words, if \p F has several base registers, this function may
834*9880d681SAndroid Build Coastguard Worker /// still return true. Therefore, users still need to account for
835*9880d681SAndroid Build Coastguard Worker /// additional base registers and/or unfolded offsets to derive an
836*9880d681SAndroid Build Coastguard Worker /// accurate cost model.
837*9880d681SAndroid Build Coastguard Worker static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
838*9880d681SAndroid Build Coastguard Worker                                  const LSRUse &LU, const Formula &F);
839*9880d681SAndroid Build Coastguard Worker // Get the cost of the scaling factor used in F for LU.
840*9880d681SAndroid Build Coastguard Worker static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
841*9880d681SAndroid Build Coastguard Worker                                      const LSRUse &LU, const Formula &F);
842*9880d681SAndroid Build Coastguard Worker 
843*9880d681SAndroid Build Coastguard Worker namespace {
844*9880d681SAndroid Build Coastguard Worker 
845*9880d681SAndroid Build Coastguard Worker /// This class is used to measure and compare candidate formulae.
846*9880d681SAndroid Build Coastguard Worker class Cost {
847*9880d681SAndroid Build Coastguard Worker   /// TODO: Some of these could be merged. Also, a lexical ordering
848*9880d681SAndroid Build Coastguard Worker   /// isn't always optimal.
849*9880d681SAndroid Build Coastguard Worker   unsigned NumRegs;
850*9880d681SAndroid Build Coastguard Worker   unsigned AddRecCost;
851*9880d681SAndroid Build Coastguard Worker   unsigned NumIVMuls;
852*9880d681SAndroid Build Coastguard Worker   unsigned NumBaseAdds;
853*9880d681SAndroid Build Coastguard Worker   unsigned ImmCost;
854*9880d681SAndroid Build Coastguard Worker   unsigned SetupCost;
855*9880d681SAndroid Build Coastguard Worker   unsigned ScaleCost;
856*9880d681SAndroid Build Coastguard Worker 
857*9880d681SAndroid Build Coastguard Worker public:
Cost()858*9880d681SAndroid Build Coastguard Worker   Cost()
859*9880d681SAndroid Build Coastguard Worker     : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
860*9880d681SAndroid Build Coastguard Worker       SetupCost(0), ScaleCost(0) {}
861*9880d681SAndroid Build Coastguard Worker 
862*9880d681SAndroid Build Coastguard Worker   bool operator<(const Cost &Other) const;
863*9880d681SAndroid Build Coastguard Worker 
864*9880d681SAndroid Build Coastguard Worker   void Lose();
865*9880d681SAndroid Build Coastguard Worker 
866*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
867*9880d681SAndroid Build Coastguard Worker   // Once any of the metrics loses, they must all remain losers.
isValid()868*9880d681SAndroid Build Coastguard Worker   bool isValid() {
869*9880d681SAndroid Build Coastguard Worker     return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
870*9880d681SAndroid Build Coastguard Worker              | ImmCost | SetupCost | ScaleCost) != ~0u)
871*9880d681SAndroid Build Coastguard Worker       || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
872*9880d681SAndroid Build Coastguard Worker            & ImmCost & SetupCost & ScaleCost) == ~0u);
873*9880d681SAndroid Build Coastguard Worker   }
874*9880d681SAndroid Build Coastguard Worker #endif
875*9880d681SAndroid Build Coastguard Worker 
isLoser()876*9880d681SAndroid Build Coastguard Worker   bool isLoser() {
877*9880d681SAndroid Build Coastguard Worker     assert(isValid() && "invalid cost");
878*9880d681SAndroid Build Coastguard Worker     return NumRegs == ~0u;
879*9880d681SAndroid Build Coastguard Worker   }
880*9880d681SAndroid Build Coastguard Worker 
881*9880d681SAndroid Build Coastguard Worker   void RateFormula(const TargetTransformInfo &TTI,
882*9880d681SAndroid Build Coastguard Worker                    const Formula &F,
883*9880d681SAndroid Build Coastguard Worker                    SmallPtrSetImpl<const SCEV *> &Regs,
884*9880d681SAndroid Build Coastguard Worker                    const DenseSet<const SCEV *> &VisitedRegs,
885*9880d681SAndroid Build Coastguard Worker                    const Loop *L,
886*9880d681SAndroid Build Coastguard Worker                    const SmallVectorImpl<int64_t> &Offsets,
887*9880d681SAndroid Build Coastguard Worker                    ScalarEvolution &SE, DominatorTree &DT,
888*9880d681SAndroid Build Coastguard Worker                    const LSRUse &LU,
889*9880d681SAndroid Build Coastguard Worker                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
890*9880d681SAndroid Build Coastguard Worker 
891*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
892*9880d681SAndroid Build Coastguard Worker   void dump() const;
893*9880d681SAndroid Build Coastguard Worker 
894*9880d681SAndroid Build Coastguard Worker private:
895*9880d681SAndroid Build Coastguard Worker   void RateRegister(const SCEV *Reg,
896*9880d681SAndroid Build Coastguard Worker                     SmallPtrSetImpl<const SCEV *> &Regs,
897*9880d681SAndroid Build Coastguard Worker                     const Loop *L,
898*9880d681SAndroid Build Coastguard Worker                     ScalarEvolution &SE, DominatorTree &DT);
899*9880d681SAndroid Build Coastguard Worker   void RatePrimaryRegister(const SCEV *Reg,
900*9880d681SAndroid Build Coastguard Worker                            SmallPtrSetImpl<const SCEV *> &Regs,
901*9880d681SAndroid Build Coastguard Worker                            const Loop *L,
902*9880d681SAndroid Build Coastguard Worker                            ScalarEvolution &SE, DominatorTree &DT,
903*9880d681SAndroid Build Coastguard Worker                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
904*9880d681SAndroid Build Coastguard Worker };
905*9880d681SAndroid Build Coastguard Worker 
906*9880d681SAndroid Build Coastguard Worker }
907*9880d681SAndroid Build Coastguard Worker 
908*9880d681SAndroid Build Coastguard Worker /// Tally up interesting quantities from the given register.
RateRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT)909*9880d681SAndroid Build Coastguard Worker void Cost::RateRegister(const SCEV *Reg,
910*9880d681SAndroid Build Coastguard Worker                         SmallPtrSetImpl<const SCEV *> &Regs,
911*9880d681SAndroid Build Coastguard Worker                         const Loop *L,
912*9880d681SAndroid Build Coastguard Worker                         ScalarEvolution &SE, DominatorTree &DT) {
913*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
914*9880d681SAndroid Build Coastguard Worker     // If this is an addrec for another loop, don't second-guess its addrec phi
915*9880d681SAndroid Build Coastguard Worker     // nodes. LSR isn't currently smart enough to reason about more than one
916*9880d681SAndroid Build Coastguard Worker     // loop at a time. LSR has already run on inner loops, will not run on outer
917*9880d681SAndroid Build Coastguard Worker     // loops, and cannot be expected to change sibling loops.
918*9880d681SAndroid Build Coastguard Worker     if (AR->getLoop() != L) {
919*9880d681SAndroid Build Coastguard Worker       // If the AddRec exists, consider it's register free and leave it alone.
920*9880d681SAndroid Build Coastguard Worker       if (isExistingPhi(AR, SE))
921*9880d681SAndroid Build Coastguard Worker         return;
922*9880d681SAndroid Build Coastguard Worker 
923*9880d681SAndroid Build Coastguard Worker       // Otherwise, do not consider this formula at all.
924*9880d681SAndroid Build Coastguard Worker       Lose();
925*9880d681SAndroid Build Coastguard Worker       return;
926*9880d681SAndroid Build Coastguard Worker     }
927*9880d681SAndroid Build Coastguard Worker     AddRecCost += 1; /// TODO: This should be a function of the stride.
928*9880d681SAndroid Build Coastguard Worker 
929*9880d681SAndroid Build Coastguard Worker     // Add the step value register, if it needs one.
930*9880d681SAndroid Build Coastguard Worker     // TODO: The non-affine case isn't precisely modeled here.
931*9880d681SAndroid Build Coastguard Worker     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
932*9880d681SAndroid Build Coastguard Worker       if (!Regs.count(AR->getOperand(1))) {
933*9880d681SAndroid Build Coastguard Worker         RateRegister(AR->getOperand(1), Regs, L, SE, DT);
934*9880d681SAndroid Build Coastguard Worker         if (isLoser())
935*9880d681SAndroid Build Coastguard Worker           return;
936*9880d681SAndroid Build Coastguard Worker       }
937*9880d681SAndroid Build Coastguard Worker     }
938*9880d681SAndroid Build Coastguard Worker   }
939*9880d681SAndroid Build Coastguard Worker   ++NumRegs;
940*9880d681SAndroid Build Coastguard Worker 
941*9880d681SAndroid Build Coastguard Worker   // Rough heuristic; favor registers which don't require extra setup
942*9880d681SAndroid Build Coastguard Worker   // instructions in the preheader.
943*9880d681SAndroid Build Coastguard Worker   if (!isa<SCEVUnknown>(Reg) &&
944*9880d681SAndroid Build Coastguard Worker       !isa<SCEVConstant>(Reg) &&
945*9880d681SAndroid Build Coastguard Worker       !(isa<SCEVAddRecExpr>(Reg) &&
946*9880d681SAndroid Build Coastguard Worker         (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
947*9880d681SAndroid Build Coastguard Worker          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
948*9880d681SAndroid Build Coastguard Worker     ++SetupCost;
949*9880d681SAndroid Build Coastguard Worker 
950*9880d681SAndroid Build Coastguard Worker   NumIVMuls += isa<SCEVMulExpr>(Reg) &&
951*9880d681SAndroid Build Coastguard Worker                SE.hasComputableLoopEvolution(Reg, L);
952*9880d681SAndroid Build Coastguard Worker }
953*9880d681SAndroid Build Coastguard Worker 
954*9880d681SAndroid Build Coastguard Worker /// Record this register in the set. If we haven't seen it before, rate
955*9880d681SAndroid Build Coastguard Worker /// it. Optional LoserRegs provides a way to declare any formula that refers to
956*9880d681SAndroid Build Coastguard Worker /// one of those regs an instant loser.
RatePrimaryRegister(const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,const Loop * L,ScalarEvolution & SE,DominatorTree & DT,SmallPtrSetImpl<const SCEV * > * LoserRegs)957*9880d681SAndroid Build Coastguard Worker void Cost::RatePrimaryRegister(const SCEV *Reg,
958*9880d681SAndroid Build Coastguard Worker                                SmallPtrSetImpl<const SCEV *> &Regs,
959*9880d681SAndroid Build Coastguard Worker                                const Loop *L,
960*9880d681SAndroid Build Coastguard Worker                                ScalarEvolution &SE, DominatorTree &DT,
961*9880d681SAndroid Build Coastguard Worker                                SmallPtrSetImpl<const SCEV *> *LoserRegs) {
962*9880d681SAndroid Build Coastguard Worker   if (LoserRegs && LoserRegs->count(Reg)) {
963*9880d681SAndroid Build Coastguard Worker     Lose();
964*9880d681SAndroid Build Coastguard Worker     return;
965*9880d681SAndroid Build Coastguard Worker   }
966*9880d681SAndroid Build Coastguard Worker   if (Regs.insert(Reg).second) {
967*9880d681SAndroid Build Coastguard Worker     RateRegister(Reg, Regs, L, SE, DT);
968*9880d681SAndroid Build Coastguard Worker     if (LoserRegs && isLoser())
969*9880d681SAndroid Build Coastguard Worker       LoserRegs->insert(Reg);
970*9880d681SAndroid Build Coastguard Worker   }
971*9880d681SAndroid Build Coastguard Worker }
972*9880d681SAndroid Build Coastguard Worker 
RateFormula(const TargetTransformInfo & TTI,const Formula & F,SmallPtrSetImpl<const SCEV * > & Regs,const DenseSet<const SCEV * > & VisitedRegs,const Loop * L,const SmallVectorImpl<int64_t> & Offsets,ScalarEvolution & SE,DominatorTree & DT,const LSRUse & LU,SmallPtrSetImpl<const SCEV * > * LoserRegs)973*9880d681SAndroid Build Coastguard Worker void Cost::RateFormula(const TargetTransformInfo &TTI,
974*9880d681SAndroid Build Coastguard Worker                        const Formula &F,
975*9880d681SAndroid Build Coastguard Worker                        SmallPtrSetImpl<const SCEV *> &Regs,
976*9880d681SAndroid Build Coastguard Worker                        const DenseSet<const SCEV *> &VisitedRegs,
977*9880d681SAndroid Build Coastguard Worker                        const Loop *L,
978*9880d681SAndroid Build Coastguard Worker                        const SmallVectorImpl<int64_t> &Offsets,
979*9880d681SAndroid Build Coastguard Worker                        ScalarEvolution &SE, DominatorTree &DT,
980*9880d681SAndroid Build Coastguard Worker                        const LSRUse &LU,
981*9880d681SAndroid Build Coastguard Worker                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
982*9880d681SAndroid Build Coastguard Worker   assert(F.isCanonical() && "Cost is accurate only for canonical formula");
983*9880d681SAndroid Build Coastguard Worker   // Tally up the registers.
984*9880d681SAndroid Build Coastguard Worker   if (const SCEV *ScaledReg = F.ScaledReg) {
985*9880d681SAndroid Build Coastguard Worker     if (VisitedRegs.count(ScaledReg)) {
986*9880d681SAndroid Build Coastguard Worker       Lose();
987*9880d681SAndroid Build Coastguard Worker       return;
988*9880d681SAndroid Build Coastguard Worker     }
989*9880d681SAndroid Build Coastguard Worker     RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
990*9880d681SAndroid Build Coastguard Worker     if (isLoser())
991*9880d681SAndroid Build Coastguard Worker       return;
992*9880d681SAndroid Build Coastguard Worker   }
993*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : F.BaseRegs) {
994*9880d681SAndroid Build Coastguard Worker     if (VisitedRegs.count(BaseReg)) {
995*9880d681SAndroid Build Coastguard Worker       Lose();
996*9880d681SAndroid Build Coastguard Worker       return;
997*9880d681SAndroid Build Coastguard Worker     }
998*9880d681SAndroid Build Coastguard Worker     RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
999*9880d681SAndroid Build Coastguard Worker     if (isLoser())
1000*9880d681SAndroid Build Coastguard Worker       return;
1001*9880d681SAndroid Build Coastguard Worker   }
1002*9880d681SAndroid Build Coastguard Worker 
1003*9880d681SAndroid Build Coastguard Worker   // Determine how many (unfolded) adds we'll need inside the loop.
1004*9880d681SAndroid Build Coastguard Worker   size_t NumBaseParts = F.getNumRegs();
1005*9880d681SAndroid Build Coastguard Worker   if (NumBaseParts > 1)
1006*9880d681SAndroid Build Coastguard Worker     // Do not count the base and a possible second register if the target
1007*9880d681SAndroid Build Coastguard Worker     // allows to fold 2 registers.
1008*9880d681SAndroid Build Coastguard Worker     NumBaseAdds +=
1009*9880d681SAndroid Build Coastguard Worker         NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
1010*9880d681SAndroid Build Coastguard Worker   NumBaseAdds += (F.UnfoldedOffset != 0);
1011*9880d681SAndroid Build Coastguard Worker 
1012*9880d681SAndroid Build Coastguard Worker   // Accumulate non-free scaling amounts.
1013*9880d681SAndroid Build Coastguard Worker   ScaleCost += getScalingFactorCost(TTI, LU, F);
1014*9880d681SAndroid Build Coastguard Worker 
1015*9880d681SAndroid Build Coastguard Worker   // Tally up the non-zero immediates.
1016*9880d681SAndroid Build Coastguard Worker   for (int64_t O : Offsets) {
1017*9880d681SAndroid Build Coastguard Worker     int64_t Offset = (uint64_t)O + F.BaseOffset;
1018*9880d681SAndroid Build Coastguard Worker     if (F.BaseGV)
1019*9880d681SAndroid Build Coastguard Worker       ImmCost += 64; // Handle symbolic values conservatively.
1020*9880d681SAndroid Build Coastguard Worker                      // TODO: This should probably be the pointer size.
1021*9880d681SAndroid Build Coastguard Worker     else if (Offset != 0)
1022*9880d681SAndroid Build Coastguard Worker       ImmCost += APInt(64, Offset, true).getMinSignedBits();
1023*9880d681SAndroid Build Coastguard Worker   }
1024*9880d681SAndroid Build Coastguard Worker   assert(isValid() && "invalid cost");
1025*9880d681SAndroid Build Coastguard Worker }
1026*9880d681SAndroid Build Coastguard Worker 
1027*9880d681SAndroid Build Coastguard Worker /// Set this cost to a losing value.
Lose()1028*9880d681SAndroid Build Coastguard Worker void Cost::Lose() {
1029*9880d681SAndroid Build Coastguard Worker   NumRegs = ~0u;
1030*9880d681SAndroid Build Coastguard Worker   AddRecCost = ~0u;
1031*9880d681SAndroid Build Coastguard Worker   NumIVMuls = ~0u;
1032*9880d681SAndroid Build Coastguard Worker   NumBaseAdds = ~0u;
1033*9880d681SAndroid Build Coastguard Worker   ImmCost = ~0u;
1034*9880d681SAndroid Build Coastguard Worker   SetupCost = ~0u;
1035*9880d681SAndroid Build Coastguard Worker   ScaleCost = ~0u;
1036*9880d681SAndroid Build Coastguard Worker }
1037*9880d681SAndroid Build Coastguard Worker 
1038*9880d681SAndroid Build Coastguard Worker /// Choose the lower cost.
operator <(const Cost & Other) const1039*9880d681SAndroid Build Coastguard Worker bool Cost::operator<(const Cost &Other) const {
1040*9880d681SAndroid Build Coastguard Worker   return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
1041*9880d681SAndroid Build Coastguard Worker                   ImmCost, SetupCost) <
1042*9880d681SAndroid Build Coastguard Worker          std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
1043*9880d681SAndroid Build Coastguard Worker                   Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
1044*9880d681SAndroid Build Coastguard Worker                   Other.SetupCost);
1045*9880d681SAndroid Build Coastguard Worker }
1046*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const1047*9880d681SAndroid Build Coastguard Worker void Cost::print(raw_ostream &OS) const {
1048*9880d681SAndroid Build Coastguard Worker   OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
1049*9880d681SAndroid Build Coastguard Worker   if (AddRecCost != 0)
1050*9880d681SAndroid Build Coastguard Worker     OS << ", with addrec cost " << AddRecCost;
1051*9880d681SAndroid Build Coastguard Worker   if (NumIVMuls != 0)
1052*9880d681SAndroid Build Coastguard Worker     OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
1053*9880d681SAndroid Build Coastguard Worker   if (NumBaseAdds != 0)
1054*9880d681SAndroid Build Coastguard Worker     OS << ", plus " << NumBaseAdds << " base add"
1055*9880d681SAndroid Build Coastguard Worker        << (NumBaseAdds == 1 ? "" : "s");
1056*9880d681SAndroid Build Coastguard Worker   if (ScaleCost != 0)
1057*9880d681SAndroid Build Coastguard Worker     OS << ", plus " << ScaleCost << " scale cost";
1058*9880d681SAndroid Build Coastguard Worker   if (ImmCost != 0)
1059*9880d681SAndroid Build Coastguard Worker     OS << ", plus " << ImmCost << " imm cost";
1060*9880d681SAndroid Build Coastguard Worker   if (SetupCost != 0)
1061*9880d681SAndroid Build Coastguard Worker     OS << ", plus " << SetupCost << " setup cost";
1062*9880d681SAndroid Build Coastguard Worker }
1063*9880d681SAndroid Build Coastguard Worker 
1064*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const1065*9880d681SAndroid Build Coastguard Worker void Cost::dump() const {
1066*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
1067*9880d681SAndroid Build Coastguard Worker }
1068*9880d681SAndroid Build Coastguard Worker 
1069*9880d681SAndroid Build Coastguard Worker namespace {
1070*9880d681SAndroid Build Coastguard Worker 
1071*9880d681SAndroid Build Coastguard Worker /// An operand value in an instruction which is to be replaced with some
1072*9880d681SAndroid Build Coastguard Worker /// equivalent, possibly strength-reduced, replacement.
1073*9880d681SAndroid Build Coastguard Worker struct LSRFixup {
1074*9880d681SAndroid Build Coastguard Worker   /// The instruction which will be updated.
1075*9880d681SAndroid Build Coastguard Worker   Instruction *UserInst;
1076*9880d681SAndroid Build Coastguard Worker 
1077*9880d681SAndroid Build Coastguard Worker   /// The operand of the instruction which will be replaced. The operand may be
1078*9880d681SAndroid Build Coastguard Worker   /// used more than once; every instance will be replaced.
1079*9880d681SAndroid Build Coastguard Worker   Value *OperandValToReplace;
1080*9880d681SAndroid Build Coastguard Worker 
1081*9880d681SAndroid Build Coastguard Worker   /// If this user is to use the post-incremented value of an induction
1082*9880d681SAndroid Build Coastguard Worker   /// variable, this variable is non-null and holds the loop associated with the
1083*9880d681SAndroid Build Coastguard Worker   /// induction variable.
1084*9880d681SAndroid Build Coastguard Worker   PostIncLoopSet PostIncLoops;
1085*9880d681SAndroid Build Coastguard Worker 
1086*9880d681SAndroid Build Coastguard Worker   /// The index of the LSRUse describing the expression which this fixup needs,
1087*9880d681SAndroid Build Coastguard Worker   /// minus an offset (below).
1088*9880d681SAndroid Build Coastguard Worker   size_t LUIdx;
1089*9880d681SAndroid Build Coastguard Worker 
1090*9880d681SAndroid Build Coastguard Worker   /// A constant offset to be added to the LSRUse expression.  This allows
1091*9880d681SAndroid Build Coastguard Worker   /// multiple fixups to share the same LSRUse with different offsets, for
1092*9880d681SAndroid Build Coastguard Worker   /// example in an unrolled loop.
1093*9880d681SAndroid Build Coastguard Worker   int64_t Offset;
1094*9880d681SAndroid Build Coastguard Worker 
1095*9880d681SAndroid Build Coastguard Worker   bool isUseFullyOutsideLoop(const Loop *L) const;
1096*9880d681SAndroid Build Coastguard Worker 
1097*9880d681SAndroid Build Coastguard Worker   LSRFixup();
1098*9880d681SAndroid Build Coastguard Worker 
1099*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
1100*9880d681SAndroid Build Coastguard Worker   void dump() const;
1101*9880d681SAndroid Build Coastguard Worker };
1102*9880d681SAndroid Build Coastguard Worker 
1103*9880d681SAndroid Build Coastguard Worker }
1104*9880d681SAndroid Build Coastguard Worker 
LSRFixup()1105*9880d681SAndroid Build Coastguard Worker LSRFixup::LSRFixup()
1106*9880d681SAndroid Build Coastguard Worker   : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
1107*9880d681SAndroid Build Coastguard Worker     Offset(0) {}
1108*9880d681SAndroid Build Coastguard Worker 
1109*9880d681SAndroid Build Coastguard Worker /// Test whether this fixup always uses its value outside of the given loop.
isUseFullyOutsideLoop(const Loop * L) const1110*9880d681SAndroid Build Coastguard Worker bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1111*9880d681SAndroid Build Coastguard Worker   // PHI nodes use their value in their incoming blocks.
1112*9880d681SAndroid Build Coastguard Worker   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1113*9880d681SAndroid Build Coastguard Worker     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1114*9880d681SAndroid Build Coastguard Worker       if (PN->getIncomingValue(i) == OperandValToReplace &&
1115*9880d681SAndroid Build Coastguard Worker           L->contains(PN->getIncomingBlock(i)))
1116*9880d681SAndroid Build Coastguard Worker         return false;
1117*9880d681SAndroid Build Coastguard Worker     return true;
1118*9880d681SAndroid Build Coastguard Worker   }
1119*9880d681SAndroid Build Coastguard Worker 
1120*9880d681SAndroid Build Coastguard Worker   return !L->contains(UserInst);
1121*9880d681SAndroid Build Coastguard Worker }
1122*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const1123*9880d681SAndroid Build Coastguard Worker void LSRFixup::print(raw_ostream &OS) const {
1124*9880d681SAndroid Build Coastguard Worker   OS << "UserInst=";
1125*9880d681SAndroid Build Coastguard Worker   // Store is common and interesting enough to be worth special-casing.
1126*9880d681SAndroid Build Coastguard Worker   if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1127*9880d681SAndroid Build Coastguard Worker     OS << "store ";
1128*9880d681SAndroid Build Coastguard Worker     Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
1129*9880d681SAndroid Build Coastguard Worker   } else if (UserInst->getType()->isVoidTy())
1130*9880d681SAndroid Build Coastguard Worker     OS << UserInst->getOpcodeName();
1131*9880d681SAndroid Build Coastguard Worker   else
1132*9880d681SAndroid Build Coastguard Worker     UserInst->printAsOperand(OS, /*PrintType=*/false);
1133*9880d681SAndroid Build Coastguard Worker 
1134*9880d681SAndroid Build Coastguard Worker   OS << ", OperandValToReplace=";
1135*9880d681SAndroid Build Coastguard Worker   OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
1136*9880d681SAndroid Build Coastguard Worker 
1137*9880d681SAndroid Build Coastguard Worker   for (const Loop *PIL : PostIncLoops) {
1138*9880d681SAndroid Build Coastguard Worker     OS << ", PostIncLoop=";
1139*9880d681SAndroid Build Coastguard Worker     PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
1140*9880d681SAndroid Build Coastguard Worker   }
1141*9880d681SAndroid Build Coastguard Worker 
1142*9880d681SAndroid Build Coastguard Worker   if (LUIdx != ~size_t(0))
1143*9880d681SAndroid Build Coastguard Worker     OS << ", LUIdx=" << LUIdx;
1144*9880d681SAndroid Build Coastguard Worker 
1145*9880d681SAndroid Build Coastguard Worker   if (Offset != 0)
1146*9880d681SAndroid Build Coastguard Worker     OS << ", Offset=" << Offset;
1147*9880d681SAndroid Build Coastguard Worker }
1148*9880d681SAndroid Build Coastguard Worker 
1149*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const1150*9880d681SAndroid Build Coastguard Worker void LSRFixup::dump() const {
1151*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
1152*9880d681SAndroid Build Coastguard Worker }
1153*9880d681SAndroid Build Coastguard Worker 
1154*9880d681SAndroid Build Coastguard Worker namespace {
1155*9880d681SAndroid Build Coastguard Worker 
1156*9880d681SAndroid Build Coastguard Worker /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
1157*9880d681SAndroid Build Coastguard Worker /// SmallVectors of const SCEV*.
1158*9880d681SAndroid Build Coastguard Worker struct UniquifierDenseMapInfo {
getEmptyKey__anon6e4801cc0711::UniquifierDenseMapInfo1159*9880d681SAndroid Build Coastguard Worker   static SmallVector<const SCEV *, 4> getEmptyKey() {
1160*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 4>  V;
1161*9880d681SAndroid Build Coastguard Worker     V.push_back(reinterpret_cast<const SCEV *>(-1));
1162*9880d681SAndroid Build Coastguard Worker     return V;
1163*9880d681SAndroid Build Coastguard Worker   }
1164*9880d681SAndroid Build Coastguard Worker 
getTombstoneKey__anon6e4801cc0711::UniquifierDenseMapInfo1165*9880d681SAndroid Build Coastguard Worker   static SmallVector<const SCEV *, 4> getTombstoneKey() {
1166*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 4> V;
1167*9880d681SAndroid Build Coastguard Worker     V.push_back(reinterpret_cast<const SCEV *>(-2));
1168*9880d681SAndroid Build Coastguard Worker     return V;
1169*9880d681SAndroid Build Coastguard Worker   }
1170*9880d681SAndroid Build Coastguard Worker 
getHashValue__anon6e4801cc0711::UniquifierDenseMapInfo1171*9880d681SAndroid Build Coastguard Worker   static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
1172*9880d681SAndroid Build Coastguard Worker     return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
1173*9880d681SAndroid Build Coastguard Worker   }
1174*9880d681SAndroid Build Coastguard Worker 
isEqual__anon6e4801cc0711::UniquifierDenseMapInfo1175*9880d681SAndroid Build Coastguard Worker   static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
1176*9880d681SAndroid Build Coastguard Worker                       const SmallVector<const SCEV *, 4> &RHS) {
1177*9880d681SAndroid Build Coastguard Worker     return LHS == RHS;
1178*9880d681SAndroid Build Coastguard Worker   }
1179*9880d681SAndroid Build Coastguard Worker };
1180*9880d681SAndroid Build Coastguard Worker 
1181*9880d681SAndroid Build Coastguard Worker /// This class holds the state that LSR keeps for each use in IVUsers, as well
1182*9880d681SAndroid Build Coastguard Worker /// as uses invented by LSR itself. It includes information about what kinds of
1183*9880d681SAndroid Build Coastguard Worker /// things can be folded into the user, information about the user itself, and
1184*9880d681SAndroid Build Coastguard Worker /// information about how the use may be satisfied.  TODO: Represent multiple
1185*9880d681SAndroid Build Coastguard Worker /// users of the same expression in common?
1186*9880d681SAndroid Build Coastguard Worker class LSRUse {
1187*9880d681SAndroid Build Coastguard Worker   DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
1188*9880d681SAndroid Build Coastguard Worker 
1189*9880d681SAndroid Build Coastguard Worker public:
1190*9880d681SAndroid Build Coastguard Worker   /// An enum for a kind of use, indicating what types of scaled and immediate
1191*9880d681SAndroid Build Coastguard Worker   /// operands it might support.
1192*9880d681SAndroid Build Coastguard Worker   enum KindType {
1193*9880d681SAndroid Build Coastguard Worker     Basic,   ///< A normal use, with no folding.
1194*9880d681SAndroid Build Coastguard Worker     Special, ///< A special case of basic, allowing -1 scales.
1195*9880d681SAndroid Build Coastguard Worker     Address, ///< An address use; folding according to TargetLowering
1196*9880d681SAndroid Build Coastguard Worker     ICmpZero ///< An equality icmp with both operands folded into one.
1197*9880d681SAndroid Build Coastguard Worker     // TODO: Add a generic icmp too?
1198*9880d681SAndroid Build Coastguard Worker   };
1199*9880d681SAndroid Build Coastguard Worker 
1200*9880d681SAndroid Build Coastguard Worker   typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
1201*9880d681SAndroid Build Coastguard Worker 
1202*9880d681SAndroid Build Coastguard Worker   KindType Kind;
1203*9880d681SAndroid Build Coastguard Worker   MemAccessTy AccessTy;
1204*9880d681SAndroid Build Coastguard Worker 
1205*9880d681SAndroid Build Coastguard Worker   SmallVector<int64_t, 8> Offsets;
1206*9880d681SAndroid Build Coastguard Worker   int64_t MinOffset;
1207*9880d681SAndroid Build Coastguard Worker   int64_t MaxOffset;
1208*9880d681SAndroid Build Coastguard Worker 
1209*9880d681SAndroid Build Coastguard Worker   /// This records whether all of the fixups using this LSRUse are outside of
1210*9880d681SAndroid Build Coastguard Worker   /// the loop, in which case some special-case heuristics may be used.
1211*9880d681SAndroid Build Coastguard Worker   bool AllFixupsOutsideLoop;
1212*9880d681SAndroid Build Coastguard Worker 
1213*9880d681SAndroid Build Coastguard Worker   /// RigidFormula is set to true to guarantee that this use will be associated
1214*9880d681SAndroid Build Coastguard Worker   /// with a single formula--the one that initially matched. Some SCEV
1215*9880d681SAndroid Build Coastguard Worker   /// expressions cannot be expanded. This allows LSR to consider the registers
1216*9880d681SAndroid Build Coastguard Worker   /// used by those expressions without the need to expand them later after
1217*9880d681SAndroid Build Coastguard Worker   /// changing the formula.
1218*9880d681SAndroid Build Coastguard Worker   bool RigidFormula;
1219*9880d681SAndroid Build Coastguard Worker 
1220*9880d681SAndroid Build Coastguard Worker   /// This records the widest use type for any fixup using this
1221*9880d681SAndroid Build Coastguard Worker   /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1222*9880d681SAndroid Build Coastguard Worker   /// fixup widths to be equivalent, because the narrower one may be relying on
1223*9880d681SAndroid Build Coastguard Worker   /// the implicit truncation to truncate away bogus bits.
1224*9880d681SAndroid Build Coastguard Worker   Type *WidestFixupType;
1225*9880d681SAndroid Build Coastguard Worker 
1226*9880d681SAndroid Build Coastguard Worker   /// A list of ways to build a value that can satisfy this user.  After the
1227*9880d681SAndroid Build Coastguard Worker   /// list is populated, one of these is selected heuristically and used to
1228*9880d681SAndroid Build Coastguard Worker   /// formulate a replacement for OperandValToReplace in UserInst.
1229*9880d681SAndroid Build Coastguard Worker   SmallVector<Formula, 12> Formulae;
1230*9880d681SAndroid Build Coastguard Worker 
1231*9880d681SAndroid Build Coastguard Worker   /// The set of register candidates used by all formulae in this LSRUse.
1232*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 4> Regs;
1233*9880d681SAndroid Build Coastguard Worker 
LSRUse(KindType K,MemAccessTy AT)1234*9880d681SAndroid Build Coastguard Worker   LSRUse(KindType K, MemAccessTy AT)
1235*9880d681SAndroid Build Coastguard Worker       : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
1236*9880d681SAndroid Build Coastguard Worker         AllFixupsOutsideLoop(true), RigidFormula(false),
1237*9880d681SAndroid Build Coastguard Worker         WidestFixupType(nullptr) {}
1238*9880d681SAndroid Build Coastguard Worker 
1239*9880d681SAndroid Build Coastguard Worker   bool HasFormulaWithSameRegs(const Formula &F) const;
1240*9880d681SAndroid Build Coastguard Worker   bool InsertFormula(const Formula &F);
1241*9880d681SAndroid Build Coastguard Worker   void DeleteFormula(Formula &F);
1242*9880d681SAndroid Build Coastguard Worker   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1243*9880d681SAndroid Build Coastguard Worker 
1244*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
1245*9880d681SAndroid Build Coastguard Worker   void dump() const;
1246*9880d681SAndroid Build Coastguard Worker };
1247*9880d681SAndroid Build Coastguard Worker 
1248*9880d681SAndroid Build Coastguard Worker }
1249*9880d681SAndroid Build Coastguard Worker 
1250*9880d681SAndroid Build Coastguard Worker /// Test whether this use as a formula which has the same registers as the given
1251*9880d681SAndroid Build Coastguard Worker /// formula.
HasFormulaWithSameRegs(const Formula & F) const1252*9880d681SAndroid Build Coastguard Worker bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1253*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1254*9880d681SAndroid Build Coastguard Worker   if (F.ScaledReg) Key.push_back(F.ScaledReg);
1255*9880d681SAndroid Build Coastguard Worker   // Unstable sort by host order ok, because this is only used for uniquifying.
1256*9880d681SAndroid Build Coastguard Worker   std::sort(Key.begin(), Key.end());
1257*9880d681SAndroid Build Coastguard Worker   return Uniquifier.count(Key);
1258*9880d681SAndroid Build Coastguard Worker }
1259*9880d681SAndroid Build Coastguard Worker 
1260*9880d681SAndroid Build Coastguard Worker /// If the given formula has not yet been inserted, add it to the list, and
1261*9880d681SAndroid Build Coastguard Worker /// return true. Return false otherwise.  The formula must be in canonical form.
InsertFormula(const Formula & F)1262*9880d681SAndroid Build Coastguard Worker bool LSRUse::InsertFormula(const Formula &F) {
1263*9880d681SAndroid Build Coastguard Worker   assert(F.isCanonical() && "Invalid canonical representation");
1264*9880d681SAndroid Build Coastguard Worker 
1265*9880d681SAndroid Build Coastguard Worker   if (!Formulae.empty() && RigidFormula)
1266*9880d681SAndroid Build Coastguard Worker     return false;
1267*9880d681SAndroid Build Coastguard Worker 
1268*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
1269*9880d681SAndroid Build Coastguard Worker   if (F.ScaledReg) Key.push_back(F.ScaledReg);
1270*9880d681SAndroid Build Coastguard Worker   // Unstable sort by host order ok, because this is only used for uniquifying.
1271*9880d681SAndroid Build Coastguard Worker   std::sort(Key.begin(), Key.end());
1272*9880d681SAndroid Build Coastguard Worker 
1273*9880d681SAndroid Build Coastguard Worker   if (!Uniquifier.insert(Key).second)
1274*9880d681SAndroid Build Coastguard Worker     return false;
1275*9880d681SAndroid Build Coastguard Worker 
1276*9880d681SAndroid Build Coastguard Worker   // Using a register to hold the value of 0 is not profitable.
1277*9880d681SAndroid Build Coastguard Worker   assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
1278*9880d681SAndroid Build Coastguard Worker          "Zero allocated in a scaled register!");
1279*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
1280*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : F.BaseRegs)
1281*9880d681SAndroid Build Coastguard Worker     assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1282*9880d681SAndroid Build Coastguard Worker #endif
1283*9880d681SAndroid Build Coastguard Worker 
1284*9880d681SAndroid Build Coastguard Worker   // Add the formula to the list.
1285*9880d681SAndroid Build Coastguard Worker   Formulae.push_back(F);
1286*9880d681SAndroid Build Coastguard Worker 
1287*9880d681SAndroid Build Coastguard Worker   // Record registers now being used by this use.
1288*9880d681SAndroid Build Coastguard Worker   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1289*9880d681SAndroid Build Coastguard Worker   if (F.ScaledReg)
1290*9880d681SAndroid Build Coastguard Worker     Regs.insert(F.ScaledReg);
1291*9880d681SAndroid Build Coastguard Worker 
1292*9880d681SAndroid Build Coastguard Worker   return true;
1293*9880d681SAndroid Build Coastguard Worker }
1294*9880d681SAndroid Build Coastguard Worker 
1295*9880d681SAndroid Build Coastguard Worker /// Remove the given formula from this use's list.
DeleteFormula(Formula & F)1296*9880d681SAndroid Build Coastguard Worker void LSRUse::DeleteFormula(Formula &F) {
1297*9880d681SAndroid Build Coastguard Worker   if (&F != &Formulae.back())
1298*9880d681SAndroid Build Coastguard Worker     std::swap(F, Formulae.back());
1299*9880d681SAndroid Build Coastguard Worker   Formulae.pop_back();
1300*9880d681SAndroid Build Coastguard Worker }
1301*9880d681SAndroid Build Coastguard Worker 
1302*9880d681SAndroid Build Coastguard Worker /// Recompute the Regs field, and update RegUses.
RecomputeRegs(size_t LUIdx,RegUseTracker & RegUses)1303*9880d681SAndroid Build Coastguard Worker void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1304*9880d681SAndroid Build Coastguard Worker   // Now that we've filtered out some formulae, recompute the Regs set.
1305*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1306*9880d681SAndroid Build Coastguard Worker   Regs.clear();
1307*9880d681SAndroid Build Coastguard Worker   for (const Formula &F : Formulae) {
1308*9880d681SAndroid Build Coastguard Worker     if (F.ScaledReg) Regs.insert(F.ScaledReg);
1309*9880d681SAndroid Build Coastguard Worker     Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
1310*9880d681SAndroid Build Coastguard Worker   }
1311*9880d681SAndroid Build Coastguard Worker 
1312*9880d681SAndroid Build Coastguard Worker   // Update the RegTracker.
1313*9880d681SAndroid Build Coastguard Worker   for (const SCEV *S : OldRegs)
1314*9880d681SAndroid Build Coastguard Worker     if (!Regs.count(S))
1315*9880d681SAndroid Build Coastguard Worker       RegUses.dropRegister(S, LUIdx);
1316*9880d681SAndroid Build Coastguard Worker }
1317*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const1318*9880d681SAndroid Build Coastguard Worker void LSRUse::print(raw_ostream &OS) const {
1319*9880d681SAndroid Build Coastguard Worker   OS << "LSR Use: Kind=";
1320*9880d681SAndroid Build Coastguard Worker   switch (Kind) {
1321*9880d681SAndroid Build Coastguard Worker   case Basic:    OS << "Basic"; break;
1322*9880d681SAndroid Build Coastguard Worker   case Special:  OS << "Special"; break;
1323*9880d681SAndroid Build Coastguard Worker   case ICmpZero: OS << "ICmpZero"; break;
1324*9880d681SAndroid Build Coastguard Worker   case Address:
1325*9880d681SAndroid Build Coastguard Worker     OS << "Address of ";
1326*9880d681SAndroid Build Coastguard Worker     if (AccessTy.MemTy->isPointerTy())
1327*9880d681SAndroid Build Coastguard Worker       OS << "pointer"; // the full pointer type could be really verbose
1328*9880d681SAndroid Build Coastguard Worker     else {
1329*9880d681SAndroid Build Coastguard Worker       OS << *AccessTy.MemTy;
1330*9880d681SAndroid Build Coastguard Worker     }
1331*9880d681SAndroid Build Coastguard Worker 
1332*9880d681SAndroid Build Coastguard Worker     OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1333*9880d681SAndroid Build Coastguard Worker   }
1334*9880d681SAndroid Build Coastguard Worker 
1335*9880d681SAndroid Build Coastguard Worker   OS << ", Offsets={";
1336*9880d681SAndroid Build Coastguard Worker   bool NeedComma = false;
1337*9880d681SAndroid Build Coastguard Worker   for (int64_t O : Offsets) {
1338*9880d681SAndroid Build Coastguard Worker     if (NeedComma) OS << ',';
1339*9880d681SAndroid Build Coastguard Worker     OS << O;
1340*9880d681SAndroid Build Coastguard Worker     NeedComma = true;
1341*9880d681SAndroid Build Coastguard Worker   }
1342*9880d681SAndroid Build Coastguard Worker   OS << '}';
1343*9880d681SAndroid Build Coastguard Worker 
1344*9880d681SAndroid Build Coastguard Worker   if (AllFixupsOutsideLoop)
1345*9880d681SAndroid Build Coastguard Worker     OS << ", all-fixups-outside-loop";
1346*9880d681SAndroid Build Coastguard Worker 
1347*9880d681SAndroid Build Coastguard Worker   if (WidestFixupType)
1348*9880d681SAndroid Build Coastguard Worker     OS << ", widest fixup type: " << *WidestFixupType;
1349*9880d681SAndroid Build Coastguard Worker }
1350*9880d681SAndroid Build Coastguard Worker 
1351*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const1352*9880d681SAndroid Build Coastguard Worker void LSRUse::dump() const {
1353*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
1354*9880d681SAndroid Build Coastguard Worker }
1355*9880d681SAndroid Build Coastguard Worker 
isAMCompletelyFolded(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)1356*9880d681SAndroid Build Coastguard Worker static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1357*9880d681SAndroid Build Coastguard Worker                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
1358*9880d681SAndroid Build Coastguard Worker                                  GlobalValue *BaseGV, int64_t BaseOffset,
1359*9880d681SAndroid Build Coastguard Worker                                  bool HasBaseReg, int64_t Scale) {
1360*9880d681SAndroid Build Coastguard Worker   switch (Kind) {
1361*9880d681SAndroid Build Coastguard Worker   case LSRUse::Address:
1362*9880d681SAndroid Build Coastguard Worker     return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
1363*9880d681SAndroid Build Coastguard Worker                                      HasBaseReg, Scale, AccessTy.AddrSpace);
1364*9880d681SAndroid Build Coastguard Worker 
1365*9880d681SAndroid Build Coastguard Worker   case LSRUse::ICmpZero:
1366*9880d681SAndroid Build Coastguard Worker     // There's not even a target hook for querying whether it would be legal to
1367*9880d681SAndroid Build Coastguard Worker     // fold a GV into an ICmp.
1368*9880d681SAndroid Build Coastguard Worker     if (BaseGV)
1369*9880d681SAndroid Build Coastguard Worker       return false;
1370*9880d681SAndroid Build Coastguard Worker 
1371*9880d681SAndroid Build Coastguard Worker     // ICmp only has two operands; don't allow more than two non-trivial parts.
1372*9880d681SAndroid Build Coastguard Worker     if (Scale != 0 && HasBaseReg && BaseOffset != 0)
1373*9880d681SAndroid Build Coastguard Worker       return false;
1374*9880d681SAndroid Build Coastguard Worker 
1375*9880d681SAndroid Build Coastguard Worker     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1376*9880d681SAndroid Build Coastguard Worker     // putting the scaled register in the other operand of the icmp.
1377*9880d681SAndroid Build Coastguard Worker     if (Scale != 0 && Scale != -1)
1378*9880d681SAndroid Build Coastguard Worker       return false;
1379*9880d681SAndroid Build Coastguard Worker 
1380*9880d681SAndroid Build Coastguard Worker     // If we have low-level target information, ask the target if it can fold an
1381*9880d681SAndroid Build Coastguard Worker     // integer immediate on an icmp.
1382*9880d681SAndroid Build Coastguard Worker     if (BaseOffset != 0) {
1383*9880d681SAndroid Build Coastguard Worker       // We have one of:
1384*9880d681SAndroid Build Coastguard Worker       // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1385*9880d681SAndroid Build Coastguard Worker       // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
1386*9880d681SAndroid Build Coastguard Worker       // Offs is the ICmp immediate.
1387*9880d681SAndroid Build Coastguard Worker       if (Scale == 0)
1388*9880d681SAndroid Build Coastguard Worker         // The cast does the right thing with INT64_MIN.
1389*9880d681SAndroid Build Coastguard Worker         BaseOffset = -(uint64_t)BaseOffset;
1390*9880d681SAndroid Build Coastguard Worker       return TTI.isLegalICmpImmediate(BaseOffset);
1391*9880d681SAndroid Build Coastguard Worker     }
1392*9880d681SAndroid Build Coastguard Worker 
1393*9880d681SAndroid Build Coastguard Worker     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
1394*9880d681SAndroid Build Coastguard Worker     return true;
1395*9880d681SAndroid Build Coastguard Worker 
1396*9880d681SAndroid Build Coastguard Worker   case LSRUse::Basic:
1397*9880d681SAndroid Build Coastguard Worker     // Only handle single-register values.
1398*9880d681SAndroid Build Coastguard Worker     return !BaseGV && Scale == 0 && BaseOffset == 0;
1399*9880d681SAndroid Build Coastguard Worker 
1400*9880d681SAndroid Build Coastguard Worker   case LSRUse::Special:
1401*9880d681SAndroid Build Coastguard Worker     // Special case Basic to handle -1 scales.
1402*9880d681SAndroid Build Coastguard Worker     return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
1403*9880d681SAndroid Build Coastguard Worker   }
1404*9880d681SAndroid Build Coastguard Worker 
1405*9880d681SAndroid Build Coastguard Worker   llvm_unreachable("Invalid LSRUse Kind!");
1406*9880d681SAndroid Build Coastguard Worker }
1407*9880d681SAndroid Build Coastguard Worker 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)1408*9880d681SAndroid Build Coastguard Worker static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1409*9880d681SAndroid Build Coastguard Worker                                  int64_t MinOffset, int64_t MaxOffset,
1410*9880d681SAndroid Build Coastguard Worker                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
1411*9880d681SAndroid Build Coastguard Worker                                  GlobalValue *BaseGV, int64_t BaseOffset,
1412*9880d681SAndroid Build Coastguard Worker                                  bool HasBaseReg, int64_t Scale) {
1413*9880d681SAndroid Build Coastguard Worker   // Check for overflow.
1414*9880d681SAndroid Build Coastguard Worker   if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
1415*9880d681SAndroid Build Coastguard Worker       (MinOffset > 0))
1416*9880d681SAndroid Build Coastguard Worker     return false;
1417*9880d681SAndroid Build Coastguard Worker   MinOffset = (uint64_t)BaseOffset + MinOffset;
1418*9880d681SAndroid Build Coastguard Worker   if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
1419*9880d681SAndroid Build Coastguard Worker       (MaxOffset > 0))
1420*9880d681SAndroid Build Coastguard Worker     return false;
1421*9880d681SAndroid Build Coastguard Worker   MaxOffset = (uint64_t)BaseOffset + MaxOffset;
1422*9880d681SAndroid Build Coastguard Worker 
1423*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
1424*9880d681SAndroid Build Coastguard Worker                               HasBaseReg, Scale) &&
1425*9880d681SAndroid Build Coastguard Worker          isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
1426*9880d681SAndroid Build Coastguard Worker                               HasBaseReg, Scale);
1427*9880d681SAndroid Build Coastguard Worker }
1428*9880d681SAndroid Build Coastguard Worker 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)1429*9880d681SAndroid Build Coastguard Worker static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1430*9880d681SAndroid Build Coastguard Worker                                  int64_t MinOffset, int64_t MaxOffset,
1431*9880d681SAndroid Build Coastguard Worker                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
1432*9880d681SAndroid Build Coastguard Worker                                  const Formula &F) {
1433*9880d681SAndroid Build Coastguard Worker   // For the purpose of isAMCompletelyFolded either having a canonical formula
1434*9880d681SAndroid Build Coastguard Worker   // or a scale not equal to zero is correct.
1435*9880d681SAndroid Build Coastguard Worker   // Problems may arise from non canonical formulae having a scale == 0.
1436*9880d681SAndroid Build Coastguard Worker   // Strictly speaking it would best to just rely on canonical formulae.
1437*9880d681SAndroid Build Coastguard Worker   // However, when we generate the scaled formulae, we first check that the
1438*9880d681SAndroid Build Coastguard Worker   // scaling factor is profitable before computing the actual ScaledReg for
1439*9880d681SAndroid Build Coastguard Worker   // compile time sake.
1440*9880d681SAndroid Build Coastguard Worker   assert((F.isCanonical() || F.Scale != 0));
1441*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1442*9880d681SAndroid Build Coastguard Worker                               F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1443*9880d681SAndroid Build Coastguard Worker }
1444*9880d681SAndroid Build Coastguard Worker 
1445*9880d681SAndroid Build Coastguard Worker /// Test whether we know how to expand the current formula.
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)1446*9880d681SAndroid Build Coastguard Worker static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1447*9880d681SAndroid Build Coastguard Worker                        int64_t MaxOffset, LSRUse::KindType Kind,
1448*9880d681SAndroid Build Coastguard Worker                        MemAccessTy AccessTy, GlobalValue *BaseGV,
1449*9880d681SAndroid Build Coastguard Worker                        int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
1450*9880d681SAndroid Build Coastguard Worker   // We know how to expand completely foldable formulae.
1451*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1452*9880d681SAndroid Build Coastguard Worker                               BaseOffset, HasBaseReg, Scale) ||
1453*9880d681SAndroid Build Coastguard Worker          // Or formulae that use a base register produced by a sum of base
1454*9880d681SAndroid Build Coastguard Worker          // registers.
1455*9880d681SAndroid Build Coastguard Worker          (Scale == 1 &&
1456*9880d681SAndroid Build Coastguard Worker           isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1457*9880d681SAndroid Build Coastguard Worker                                BaseGV, BaseOffset, true, 0));
1458*9880d681SAndroid Build Coastguard Worker }
1459*9880d681SAndroid Build Coastguard Worker 
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)1460*9880d681SAndroid Build Coastguard Worker static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
1461*9880d681SAndroid Build Coastguard Worker                        int64_t MaxOffset, LSRUse::KindType Kind,
1462*9880d681SAndroid Build Coastguard Worker                        MemAccessTy AccessTy, const Formula &F) {
1463*9880d681SAndroid Build Coastguard Worker   return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1464*9880d681SAndroid Build Coastguard Worker                     F.BaseOffset, F.HasBaseReg, F.Scale);
1465*9880d681SAndroid Build Coastguard Worker }
1466*9880d681SAndroid Build Coastguard Worker 
isAMCompletelyFolded(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)1467*9880d681SAndroid Build Coastguard Worker static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1468*9880d681SAndroid Build Coastguard Worker                                  const LSRUse &LU, const Formula &F) {
1469*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1470*9880d681SAndroid Build Coastguard Worker                               LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1471*9880d681SAndroid Build Coastguard Worker                               F.Scale);
1472*9880d681SAndroid Build Coastguard Worker }
1473*9880d681SAndroid Build Coastguard Worker 
getScalingFactorCost(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)1474*9880d681SAndroid Build Coastguard Worker static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
1475*9880d681SAndroid Build Coastguard Worker                                      const LSRUse &LU, const Formula &F) {
1476*9880d681SAndroid Build Coastguard Worker   if (!F.Scale)
1477*9880d681SAndroid Build Coastguard Worker     return 0;
1478*9880d681SAndroid Build Coastguard Worker 
1479*9880d681SAndroid Build Coastguard Worker   // If the use is not completely folded in that instruction, we will have to
1480*9880d681SAndroid Build Coastguard Worker   // pay an extra cost only for scale != 1.
1481*9880d681SAndroid Build Coastguard Worker   if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
1482*9880d681SAndroid Build Coastguard Worker                             LU.AccessTy, F))
1483*9880d681SAndroid Build Coastguard Worker     return F.Scale != 1;
1484*9880d681SAndroid Build Coastguard Worker 
1485*9880d681SAndroid Build Coastguard Worker   switch (LU.Kind) {
1486*9880d681SAndroid Build Coastguard Worker   case LSRUse::Address: {
1487*9880d681SAndroid Build Coastguard Worker     // Check the scaling factor cost with both the min and max offsets.
1488*9880d681SAndroid Build Coastguard Worker     int ScaleCostMinOffset = TTI.getScalingFactorCost(
1489*9880d681SAndroid Build Coastguard Worker         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
1490*9880d681SAndroid Build Coastguard Worker         F.Scale, LU.AccessTy.AddrSpace);
1491*9880d681SAndroid Build Coastguard Worker     int ScaleCostMaxOffset = TTI.getScalingFactorCost(
1492*9880d681SAndroid Build Coastguard Worker         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
1493*9880d681SAndroid Build Coastguard Worker         F.Scale, LU.AccessTy.AddrSpace);
1494*9880d681SAndroid Build Coastguard Worker 
1495*9880d681SAndroid Build Coastguard Worker     assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
1496*9880d681SAndroid Build Coastguard Worker            "Legal addressing mode has an illegal cost!");
1497*9880d681SAndroid Build Coastguard Worker     return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1498*9880d681SAndroid Build Coastguard Worker   }
1499*9880d681SAndroid Build Coastguard Worker   case LSRUse::ICmpZero:
1500*9880d681SAndroid Build Coastguard Worker   case LSRUse::Basic:
1501*9880d681SAndroid Build Coastguard Worker   case LSRUse::Special:
1502*9880d681SAndroid Build Coastguard Worker     // The use is completely folded, i.e., everything is folded into the
1503*9880d681SAndroid Build Coastguard Worker     // instruction.
1504*9880d681SAndroid Build Coastguard Worker     return 0;
1505*9880d681SAndroid Build Coastguard Worker   }
1506*9880d681SAndroid Build Coastguard Worker 
1507*9880d681SAndroid Build Coastguard Worker   llvm_unreachable("Invalid LSRUse Kind!");
1508*9880d681SAndroid Build Coastguard Worker }
1509*9880d681SAndroid Build Coastguard Worker 
isAlwaysFoldable(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg)1510*9880d681SAndroid Build Coastguard Worker static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1511*9880d681SAndroid Build Coastguard Worker                              LSRUse::KindType Kind, MemAccessTy AccessTy,
1512*9880d681SAndroid Build Coastguard Worker                              GlobalValue *BaseGV, int64_t BaseOffset,
1513*9880d681SAndroid Build Coastguard Worker                              bool HasBaseReg) {
1514*9880d681SAndroid Build Coastguard Worker   // Fast-path: zero is always foldable.
1515*9880d681SAndroid Build Coastguard Worker   if (BaseOffset == 0 && !BaseGV) return true;
1516*9880d681SAndroid Build Coastguard Worker 
1517*9880d681SAndroid Build Coastguard Worker   // Conservatively, create an address with an immediate and a
1518*9880d681SAndroid Build Coastguard Worker   // base and a scale.
1519*9880d681SAndroid Build Coastguard Worker   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1520*9880d681SAndroid Build Coastguard Worker 
1521*9880d681SAndroid Build Coastguard Worker   // Canonicalize a scale of 1 to a base register if the formula doesn't
1522*9880d681SAndroid Build Coastguard Worker   // already have a base register.
1523*9880d681SAndroid Build Coastguard Worker   if (!HasBaseReg && Scale == 1) {
1524*9880d681SAndroid Build Coastguard Worker     Scale = 0;
1525*9880d681SAndroid Build Coastguard Worker     HasBaseReg = true;
1526*9880d681SAndroid Build Coastguard Worker   }
1527*9880d681SAndroid Build Coastguard Worker 
1528*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
1529*9880d681SAndroid Build Coastguard Worker                               HasBaseReg, Scale);
1530*9880d681SAndroid Build Coastguard Worker }
1531*9880d681SAndroid Build Coastguard Worker 
isAlwaysFoldable(const TargetTransformInfo & TTI,ScalarEvolution & SE,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const SCEV * S,bool HasBaseReg)1532*9880d681SAndroid Build Coastguard Worker static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1533*9880d681SAndroid Build Coastguard Worker                              ScalarEvolution &SE, int64_t MinOffset,
1534*9880d681SAndroid Build Coastguard Worker                              int64_t MaxOffset, LSRUse::KindType Kind,
1535*9880d681SAndroid Build Coastguard Worker                              MemAccessTy AccessTy, const SCEV *S,
1536*9880d681SAndroid Build Coastguard Worker                              bool HasBaseReg) {
1537*9880d681SAndroid Build Coastguard Worker   // Fast-path: zero is always foldable.
1538*9880d681SAndroid Build Coastguard Worker   if (S->isZero()) return true;
1539*9880d681SAndroid Build Coastguard Worker 
1540*9880d681SAndroid Build Coastguard Worker   // Conservatively, create an address with an immediate and a
1541*9880d681SAndroid Build Coastguard Worker   // base and a scale.
1542*9880d681SAndroid Build Coastguard Worker   int64_t BaseOffset = ExtractImmediate(S, SE);
1543*9880d681SAndroid Build Coastguard Worker   GlobalValue *BaseGV = ExtractSymbol(S, SE);
1544*9880d681SAndroid Build Coastguard Worker 
1545*9880d681SAndroid Build Coastguard Worker   // If there's anything else involved, it's not foldable.
1546*9880d681SAndroid Build Coastguard Worker   if (!S->isZero()) return false;
1547*9880d681SAndroid Build Coastguard Worker 
1548*9880d681SAndroid Build Coastguard Worker   // Fast-path: zero is always foldable.
1549*9880d681SAndroid Build Coastguard Worker   if (BaseOffset == 0 && !BaseGV) return true;
1550*9880d681SAndroid Build Coastguard Worker 
1551*9880d681SAndroid Build Coastguard Worker   // Conservatively, create an address with an immediate and a
1552*9880d681SAndroid Build Coastguard Worker   // base and a scale.
1553*9880d681SAndroid Build Coastguard Worker   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
1554*9880d681SAndroid Build Coastguard Worker 
1555*9880d681SAndroid Build Coastguard Worker   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1556*9880d681SAndroid Build Coastguard Worker                               BaseOffset, HasBaseReg, Scale);
1557*9880d681SAndroid Build Coastguard Worker }
1558*9880d681SAndroid Build Coastguard Worker 
1559*9880d681SAndroid Build Coastguard Worker namespace {
1560*9880d681SAndroid Build Coastguard Worker 
1561*9880d681SAndroid Build Coastguard Worker /// An individual increment in a Chain of IV increments.  Relate an IV user to
1562*9880d681SAndroid Build Coastguard Worker /// an expression that computes the IV it uses from the IV used by the previous
1563*9880d681SAndroid Build Coastguard Worker /// link in the Chain.
1564*9880d681SAndroid Build Coastguard Worker ///
1565*9880d681SAndroid Build Coastguard Worker /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
1566*9880d681SAndroid Build Coastguard Worker /// original IVOperand. The head of the chain's IVOperand is only valid during
1567*9880d681SAndroid Build Coastguard Worker /// chain collection, before LSR replaces IV users. During chain generation,
1568*9880d681SAndroid Build Coastguard Worker /// IncExpr can be used to find the new IVOperand that computes the same
1569*9880d681SAndroid Build Coastguard Worker /// expression.
1570*9880d681SAndroid Build Coastguard Worker struct IVInc {
1571*9880d681SAndroid Build Coastguard Worker   Instruction *UserInst;
1572*9880d681SAndroid Build Coastguard Worker   Value* IVOperand;
1573*9880d681SAndroid Build Coastguard Worker   const SCEV *IncExpr;
1574*9880d681SAndroid Build Coastguard Worker 
IVInc__anon6e4801cc0811::IVInc1575*9880d681SAndroid Build Coastguard Worker   IVInc(Instruction *U, Value *O, const SCEV *E):
1576*9880d681SAndroid Build Coastguard Worker     UserInst(U), IVOperand(O), IncExpr(E) {}
1577*9880d681SAndroid Build Coastguard Worker };
1578*9880d681SAndroid Build Coastguard Worker 
1579*9880d681SAndroid Build Coastguard Worker // The list of IV increments in program order.  We typically add the head of a
1580*9880d681SAndroid Build Coastguard Worker // chain without finding subsequent links.
1581*9880d681SAndroid Build Coastguard Worker struct IVChain {
1582*9880d681SAndroid Build Coastguard Worker   SmallVector<IVInc,1> Incs;
1583*9880d681SAndroid Build Coastguard Worker   const SCEV *ExprBase;
1584*9880d681SAndroid Build Coastguard Worker 
IVChain__anon6e4801cc0811::IVChain1585*9880d681SAndroid Build Coastguard Worker   IVChain() : ExprBase(nullptr) {}
1586*9880d681SAndroid Build Coastguard Worker 
IVChain__anon6e4801cc0811::IVChain1587*9880d681SAndroid Build Coastguard Worker   IVChain(const IVInc &Head, const SCEV *Base)
1588*9880d681SAndroid Build Coastguard Worker     : Incs(1, Head), ExprBase(Base) {}
1589*9880d681SAndroid Build Coastguard Worker 
1590*9880d681SAndroid Build Coastguard Worker   typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
1591*9880d681SAndroid Build Coastguard Worker 
1592*9880d681SAndroid Build Coastguard Worker   // Return the first increment in the chain.
begin__anon6e4801cc0811::IVChain1593*9880d681SAndroid Build Coastguard Worker   const_iterator begin() const {
1594*9880d681SAndroid Build Coastguard Worker     assert(!Incs.empty());
1595*9880d681SAndroid Build Coastguard Worker     return std::next(Incs.begin());
1596*9880d681SAndroid Build Coastguard Worker   }
end__anon6e4801cc0811::IVChain1597*9880d681SAndroid Build Coastguard Worker   const_iterator end() const {
1598*9880d681SAndroid Build Coastguard Worker     return Incs.end();
1599*9880d681SAndroid Build Coastguard Worker   }
1600*9880d681SAndroid Build Coastguard Worker 
1601*9880d681SAndroid Build Coastguard Worker   // Returns true if this chain contains any increments.
hasIncs__anon6e4801cc0811::IVChain1602*9880d681SAndroid Build Coastguard Worker   bool hasIncs() const { return Incs.size() >= 2; }
1603*9880d681SAndroid Build Coastguard Worker 
1604*9880d681SAndroid Build Coastguard Worker   // Add an IVInc to the end of this chain.
add__anon6e4801cc0811::IVChain1605*9880d681SAndroid Build Coastguard Worker   void add(const IVInc &X) { Incs.push_back(X); }
1606*9880d681SAndroid Build Coastguard Worker 
1607*9880d681SAndroid Build Coastguard Worker   // Returns the last UserInst in the chain.
tailUserInst__anon6e4801cc0811::IVChain1608*9880d681SAndroid Build Coastguard Worker   Instruction *tailUserInst() const { return Incs.back().UserInst; }
1609*9880d681SAndroid Build Coastguard Worker 
1610*9880d681SAndroid Build Coastguard Worker   // Returns true if IncExpr can be profitably added to this chain.
1611*9880d681SAndroid Build Coastguard Worker   bool isProfitableIncrement(const SCEV *OperExpr,
1612*9880d681SAndroid Build Coastguard Worker                              const SCEV *IncExpr,
1613*9880d681SAndroid Build Coastguard Worker                              ScalarEvolution&);
1614*9880d681SAndroid Build Coastguard Worker };
1615*9880d681SAndroid Build Coastguard Worker 
1616*9880d681SAndroid Build Coastguard Worker /// Helper for CollectChains to track multiple IV increment uses.  Distinguish
1617*9880d681SAndroid Build Coastguard Worker /// between FarUsers that definitely cross IV increments and NearUsers that may
1618*9880d681SAndroid Build Coastguard Worker /// be used between IV increments.
1619*9880d681SAndroid Build Coastguard Worker struct ChainUsers {
1620*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<Instruction*, 4> FarUsers;
1621*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<Instruction*, 4> NearUsers;
1622*9880d681SAndroid Build Coastguard Worker };
1623*9880d681SAndroid Build Coastguard Worker 
1624*9880d681SAndroid Build Coastguard Worker /// This class holds state for the main loop strength reduction logic.
1625*9880d681SAndroid Build Coastguard Worker class LSRInstance {
1626*9880d681SAndroid Build Coastguard Worker   IVUsers &IU;
1627*9880d681SAndroid Build Coastguard Worker   ScalarEvolution &SE;
1628*9880d681SAndroid Build Coastguard Worker   DominatorTree &DT;
1629*9880d681SAndroid Build Coastguard Worker   LoopInfo &LI;
1630*9880d681SAndroid Build Coastguard Worker   const TargetTransformInfo &TTI;
1631*9880d681SAndroid Build Coastguard Worker   Loop *const L;
1632*9880d681SAndroid Build Coastguard Worker   bool Changed;
1633*9880d681SAndroid Build Coastguard Worker 
1634*9880d681SAndroid Build Coastguard Worker   /// This is the insert position that the current loop's induction variable
1635*9880d681SAndroid Build Coastguard Worker   /// increment should be placed. In simple loops, this is the latch block's
1636*9880d681SAndroid Build Coastguard Worker   /// terminator. But in more complicated cases, this is a position which will
1637*9880d681SAndroid Build Coastguard Worker   /// dominate all the in-loop post-increment users.
1638*9880d681SAndroid Build Coastguard Worker   Instruction *IVIncInsertPos;
1639*9880d681SAndroid Build Coastguard Worker 
1640*9880d681SAndroid Build Coastguard Worker   /// Interesting factors between use strides.
1641*9880d681SAndroid Build Coastguard Worker   SmallSetVector<int64_t, 8> Factors;
1642*9880d681SAndroid Build Coastguard Worker 
1643*9880d681SAndroid Build Coastguard Worker   /// Interesting use types, to facilitate truncation reuse.
1644*9880d681SAndroid Build Coastguard Worker   SmallSetVector<Type *, 4> Types;
1645*9880d681SAndroid Build Coastguard Worker 
1646*9880d681SAndroid Build Coastguard Worker   /// The list of operands which are to be replaced.
1647*9880d681SAndroid Build Coastguard Worker   SmallVector<LSRFixup, 16> Fixups;
1648*9880d681SAndroid Build Coastguard Worker 
1649*9880d681SAndroid Build Coastguard Worker   /// The list of interesting uses.
1650*9880d681SAndroid Build Coastguard Worker   SmallVector<LSRUse, 16> Uses;
1651*9880d681SAndroid Build Coastguard Worker 
1652*9880d681SAndroid Build Coastguard Worker   /// Track which uses use which register candidates.
1653*9880d681SAndroid Build Coastguard Worker   RegUseTracker RegUses;
1654*9880d681SAndroid Build Coastguard Worker 
1655*9880d681SAndroid Build Coastguard Worker   // Limit the number of chains to avoid quadratic behavior. We don't expect to
1656*9880d681SAndroid Build Coastguard Worker   // have more than a few IV increment chains in a loop. Missing a Chain falls
1657*9880d681SAndroid Build Coastguard Worker   // back to normal LSR behavior for those uses.
1658*9880d681SAndroid Build Coastguard Worker   static const unsigned MaxChains = 8;
1659*9880d681SAndroid Build Coastguard Worker 
1660*9880d681SAndroid Build Coastguard Worker   /// IV users can form a chain of IV increments.
1661*9880d681SAndroid Build Coastguard Worker   SmallVector<IVChain, MaxChains> IVChainVec;
1662*9880d681SAndroid Build Coastguard Worker 
1663*9880d681SAndroid Build Coastguard Worker   /// IV users that belong to profitable IVChains.
1664*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<Use*, MaxChains> IVIncSet;
1665*9880d681SAndroid Build Coastguard Worker 
1666*9880d681SAndroid Build Coastguard Worker   void OptimizeShadowIV();
1667*9880d681SAndroid Build Coastguard Worker   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
1668*9880d681SAndroid Build Coastguard Worker   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
1669*9880d681SAndroid Build Coastguard Worker   void OptimizeLoopTermCond();
1670*9880d681SAndroid Build Coastguard Worker 
1671*9880d681SAndroid Build Coastguard Worker   void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
1672*9880d681SAndroid Build Coastguard Worker                         SmallVectorImpl<ChainUsers> &ChainUsersVec);
1673*9880d681SAndroid Build Coastguard Worker   void FinalizeChain(IVChain &Chain);
1674*9880d681SAndroid Build Coastguard Worker   void CollectChains();
1675*9880d681SAndroid Build Coastguard Worker   void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
1676*9880d681SAndroid Build Coastguard Worker                        SmallVectorImpl<WeakVH> &DeadInsts);
1677*9880d681SAndroid Build Coastguard Worker 
1678*9880d681SAndroid Build Coastguard Worker   void CollectInterestingTypesAndFactors();
1679*9880d681SAndroid Build Coastguard Worker   void CollectFixupsAndInitialFormulae();
1680*9880d681SAndroid Build Coastguard Worker 
getNewFixup()1681*9880d681SAndroid Build Coastguard Worker   LSRFixup &getNewFixup() {
1682*9880d681SAndroid Build Coastguard Worker     Fixups.push_back(LSRFixup());
1683*9880d681SAndroid Build Coastguard Worker     return Fixups.back();
1684*9880d681SAndroid Build Coastguard Worker   }
1685*9880d681SAndroid Build Coastguard Worker 
1686*9880d681SAndroid Build Coastguard Worker   // Support for sharing of LSRUses between LSRFixups.
1687*9880d681SAndroid Build Coastguard Worker   typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
1688*9880d681SAndroid Build Coastguard Worker   UseMapTy UseMap;
1689*9880d681SAndroid Build Coastguard Worker 
1690*9880d681SAndroid Build Coastguard Worker   bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
1691*9880d681SAndroid Build Coastguard Worker                           LSRUse::KindType Kind, MemAccessTy AccessTy);
1692*9880d681SAndroid Build Coastguard Worker 
1693*9880d681SAndroid Build Coastguard Worker   std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
1694*9880d681SAndroid Build Coastguard Worker                                     MemAccessTy AccessTy);
1695*9880d681SAndroid Build Coastguard Worker 
1696*9880d681SAndroid Build Coastguard Worker   void DeleteUse(LSRUse &LU, size_t LUIdx);
1697*9880d681SAndroid Build Coastguard Worker 
1698*9880d681SAndroid Build Coastguard Worker   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
1699*9880d681SAndroid Build Coastguard Worker 
1700*9880d681SAndroid Build Coastguard Worker   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1701*9880d681SAndroid Build Coastguard Worker   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
1702*9880d681SAndroid Build Coastguard Worker   void CountRegisters(const Formula &F, size_t LUIdx);
1703*9880d681SAndroid Build Coastguard Worker   bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
1704*9880d681SAndroid Build Coastguard Worker 
1705*9880d681SAndroid Build Coastguard Worker   void CollectLoopInvariantFixupsAndFormulae();
1706*9880d681SAndroid Build Coastguard Worker 
1707*9880d681SAndroid Build Coastguard Worker   void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
1708*9880d681SAndroid Build Coastguard Worker                               unsigned Depth = 0);
1709*9880d681SAndroid Build Coastguard Worker 
1710*9880d681SAndroid Build Coastguard Worker   void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
1711*9880d681SAndroid Build Coastguard Worker                                   const Formula &Base, unsigned Depth,
1712*9880d681SAndroid Build Coastguard Worker                                   size_t Idx, bool IsScaledReg = false);
1713*9880d681SAndroid Build Coastguard Worker   void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
1714*9880d681SAndroid Build Coastguard Worker   void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1715*9880d681SAndroid Build Coastguard Worker                                    const Formula &Base, size_t Idx,
1716*9880d681SAndroid Build Coastguard Worker                                    bool IsScaledReg = false);
1717*9880d681SAndroid Build Coastguard Worker   void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1718*9880d681SAndroid Build Coastguard Worker   void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
1719*9880d681SAndroid Build Coastguard Worker                                    const Formula &Base,
1720*9880d681SAndroid Build Coastguard Worker                                    const SmallVectorImpl<int64_t> &Worklist,
1721*9880d681SAndroid Build Coastguard Worker                                    size_t Idx, bool IsScaledReg = false);
1722*9880d681SAndroid Build Coastguard Worker   void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
1723*9880d681SAndroid Build Coastguard Worker   void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1724*9880d681SAndroid Build Coastguard Worker   void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
1725*9880d681SAndroid Build Coastguard Worker   void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
1726*9880d681SAndroid Build Coastguard Worker   void GenerateCrossUseConstantOffsets();
1727*9880d681SAndroid Build Coastguard Worker   void GenerateAllReuseFormulae();
1728*9880d681SAndroid Build Coastguard Worker 
1729*9880d681SAndroid Build Coastguard Worker   void FilterOutUndesirableDedicatedRegisters();
1730*9880d681SAndroid Build Coastguard Worker 
1731*9880d681SAndroid Build Coastguard Worker   size_t EstimateSearchSpaceComplexity() const;
1732*9880d681SAndroid Build Coastguard Worker   void NarrowSearchSpaceByDetectingSupersets();
1733*9880d681SAndroid Build Coastguard Worker   void NarrowSearchSpaceByCollapsingUnrolledCode();
1734*9880d681SAndroid Build Coastguard Worker   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
1735*9880d681SAndroid Build Coastguard Worker   void NarrowSearchSpaceByPickingWinnerRegs();
1736*9880d681SAndroid Build Coastguard Worker   void NarrowSearchSpaceUsingHeuristics();
1737*9880d681SAndroid Build Coastguard Worker 
1738*9880d681SAndroid Build Coastguard Worker   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
1739*9880d681SAndroid Build Coastguard Worker                     Cost &SolutionCost,
1740*9880d681SAndroid Build Coastguard Worker                     SmallVectorImpl<const Formula *> &Workspace,
1741*9880d681SAndroid Build Coastguard Worker                     const Cost &CurCost,
1742*9880d681SAndroid Build Coastguard Worker                     const SmallPtrSet<const SCEV *, 16> &CurRegs,
1743*9880d681SAndroid Build Coastguard Worker                     DenseSet<const SCEV *> &VisitedRegs) const;
1744*9880d681SAndroid Build Coastguard Worker   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
1745*9880d681SAndroid Build Coastguard Worker 
1746*9880d681SAndroid Build Coastguard Worker   BasicBlock::iterator
1747*9880d681SAndroid Build Coastguard Worker     HoistInsertPosition(BasicBlock::iterator IP,
1748*9880d681SAndroid Build Coastguard Worker                         const SmallVectorImpl<Instruction *> &Inputs) const;
1749*9880d681SAndroid Build Coastguard Worker   BasicBlock::iterator
1750*9880d681SAndroid Build Coastguard Worker     AdjustInsertPositionForExpand(BasicBlock::iterator IP,
1751*9880d681SAndroid Build Coastguard Worker                                   const LSRFixup &LF,
1752*9880d681SAndroid Build Coastguard Worker                                   const LSRUse &LU,
1753*9880d681SAndroid Build Coastguard Worker                                   SCEVExpander &Rewriter) const;
1754*9880d681SAndroid Build Coastguard Worker 
1755*9880d681SAndroid Build Coastguard Worker   Value *Expand(const LSRFixup &LF,
1756*9880d681SAndroid Build Coastguard Worker                 const Formula &F,
1757*9880d681SAndroid Build Coastguard Worker                 BasicBlock::iterator IP,
1758*9880d681SAndroid Build Coastguard Worker                 SCEVExpander &Rewriter,
1759*9880d681SAndroid Build Coastguard Worker                 SmallVectorImpl<WeakVH> &DeadInsts) const;
1760*9880d681SAndroid Build Coastguard Worker   void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
1761*9880d681SAndroid Build Coastguard Worker                      const Formula &F,
1762*9880d681SAndroid Build Coastguard Worker                      SCEVExpander &Rewriter,
1763*9880d681SAndroid Build Coastguard Worker                      SmallVectorImpl<WeakVH> &DeadInsts) const;
1764*9880d681SAndroid Build Coastguard Worker   void Rewrite(const LSRFixup &LF,
1765*9880d681SAndroid Build Coastguard Worker                const Formula &F,
1766*9880d681SAndroid Build Coastguard Worker                SCEVExpander &Rewriter,
1767*9880d681SAndroid Build Coastguard Worker                SmallVectorImpl<WeakVH> &DeadInsts) const;
1768*9880d681SAndroid Build Coastguard Worker   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
1769*9880d681SAndroid Build Coastguard Worker 
1770*9880d681SAndroid Build Coastguard Worker public:
1771*9880d681SAndroid Build Coastguard Worker   LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
1772*9880d681SAndroid Build Coastguard Worker               LoopInfo &LI, const TargetTransformInfo &TTI);
1773*9880d681SAndroid Build Coastguard Worker 
getChanged() const1774*9880d681SAndroid Build Coastguard Worker   bool getChanged() const { return Changed; }
1775*9880d681SAndroid Build Coastguard Worker 
1776*9880d681SAndroid Build Coastguard Worker   void print_factors_and_types(raw_ostream &OS) const;
1777*9880d681SAndroid Build Coastguard Worker   void print_fixups(raw_ostream &OS) const;
1778*9880d681SAndroid Build Coastguard Worker   void print_uses(raw_ostream &OS) const;
1779*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
1780*9880d681SAndroid Build Coastguard Worker   void dump() const;
1781*9880d681SAndroid Build Coastguard Worker };
1782*9880d681SAndroid Build Coastguard Worker 
1783*9880d681SAndroid Build Coastguard Worker }
1784*9880d681SAndroid Build Coastguard Worker 
1785*9880d681SAndroid Build Coastguard Worker /// If IV is used in a int-to-float cast inside the loop then try to eliminate
1786*9880d681SAndroid Build Coastguard Worker /// the cast operation.
OptimizeShadowIV()1787*9880d681SAndroid Build Coastguard Worker void LSRInstance::OptimizeShadowIV() {
1788*9880d681SAndroid Build Coastguard Worker   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1789*9880d681SAndroid Build Coastguard Worker   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1790*9880d681SAndroid Build Coastguard Worker     return;
1791*9880d681SAndroid Build Coastguard Worker 
1792*9880d681SAndroid Build Coastguard Worker   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
1793*9880d681SAndroid Build Coastguard Worker        UI != E; /* empty */) {
1794*9880d681SAndroid Build Coastguard Worker     IVUsers::const_iterator CandidateUI = UI;
1795*9880d681SAndroid Build Coastguard Worker     ++UI;
1796*9880d681SAndroid Build Coastguard Worker     Instruction *ShadowUse = CandidateUI->getUser();
1797*9880d681SAndroid Build Coastguard Worker     Type *DestTy = nullptr;
1798*9880d681SAndroid Build Coastguard Worker     bool IsSigned = false;
1799*9880d681SAndroid Build Coastguard Worker 
1800*9880d681SAndroid Build Coastguard Worker     /* If shadow use is a int->float cast then insert a second IV
1801*9880d681SAndroid Build Coastguard Worker        to eliminate this cast.
1802*9880d681SAndroid Build Coastguard Worker 
1803*9880d681SAndroid Build Coastguard Worker          for (unsigned i = 0; i < n; ++i)
1804*9880d681SAndroid Build Coastguard Worker            foo((double)i);
1805*9880d681SAndroid Build Coastguard Worker 
1806*9880d681SAndroid Build Coastguard Worker        is transformed into
1807*9880d681SAndroid Build Coastguard Worker 
1808*9880d681SAndroid Build Coastguard Worker          double d = 0.0;
1809*9880d681SAndroid Build Coastguard Worker          for (unsigned i = 0; i < n; ++i, ++d)
1810*9880d681SAndroid Build Coastguard Worker            foo(d);
1811*9880d681SAndroid Build Coastguard Worker     */
1812*9880d681SAndroid Build Coastguard Worker     if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
1813*9880d681SAndroid Build Coastguard Worker       IsSigned = false;
1814*9880d681SAndroid Build Coastguard Worker       DestTy = UCast->getDestTy();
1815*9880d681SAndroid Build Coastguard Worker     }
1816*9880d681SAndroid Build Coastguard Worker     else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
1817*9880d681SAndroid Build Coastguard Worker       IsSigned = true;
1818*9880d681SAndroid Build Coastguard Worker       DestTy = SCast->getDestTy();
1819*9880d681SAndroid Build Coastguard Worker     }
1820*9880d681SAndroid Build Coastguard Worker     if (!DestTy) continue;
1821*9880d681SAndroid Build Coastguard Worker 
1822*9880d681SAndroid Build Coastguard Worker     // If target does not support DestTy natively then do not apply
1823*9880d681SAndroid Build Coastguard Worker     // this transformation.
1824*9880d681SAndroid Build Coastguard Worker     if (!TTI.isTypeLegal(DestTy)) continue;
1825*9880d681SAndroid Build Coastguard Worker 
1826*9880d681SAndroid Build Coastguard Worker     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
1827*9880d681SAndroid Build Coastguard Worker     if (!PH) continue;
1828*9880d681SAndroid Build Coastguard Worker     if (PH->getNumIncomingValues() != 2) continue;
1829*9880d681SAndroid Build Coastguard Worker 
1830*9880d681SAndroid Build Coastguard Worker     Type *SrcTy = PH->getType();
1831*9880d681SAndroid Build Coastguard Worker     int Mantissa = DestTy->getFPMantissaWidth();
1832*9880d681SAndroid Build Coastguard Worker     if (Mantissa == -1) continue;
1833*9880d681SAndroid Build Coastguard Worker     if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
1834*9880d681SAndroid Build Coastguard Worker       continue;
1835*9880d681SAndroid Build Coastguard Worker 
1836*9880d681SAndroid Build Coastguard Worker     unsigned Entry, Latch;
1837*9880d681SAndroid Build Coastguard Worker     if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
1838*9880d681SAndroid Build Coastguard Worker       Entry = 0;
1839*9880d681SAndroid Build Coastguard Worker       Latch = 1;
1840*9880d681SAndroid Build Coastguard Worker     } else {
1841*9880d681SAndroid Build Coastguard Worker       Entry = 1;
1842*9880d681SAndroid Build Coastguard Worker       Latch = 0;
1843*9880d681SAndroid Build Coastguard Worker     }
1844*9880d681SAndroid Build Coastguard Worker 
1845*9880d681SAndroid Build Coastguard Worker     ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
1846*9880d681SAndroid Build Coastguard Worker     if (!Init) continue;
1847*9880d681SAndroid Build Coastguard Worker     Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
1848*9880d681SAndroid Build Coastguard Worker                                         (double)Init->getSExtValue() :
1849*9880d681SAndroid Build Coastguard Worker                                         (double)Init->getZExtValue());
1850*9880d681SAndroid Build Coastguard Worker 
1851*9880d681SAndroid Build Coastguard Worker     BinaryOperator *Incr =
1852*9880d681SAndroid Build Coastguard Worker       dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
1853*9880d681SAndroid Build Coastguard Worker     if (!Incr) continue;
1854*9880d681SAndroid Build Coastguard Worker     if (Incr->getOpcode() != Instruction::Add
1855*9880d681SAndroid Build Coastguard Worker         && Incr->getOpcode() != Instruction::Sub)
1856*9880d681SAndroid Build Coastguard Worker       continue;
1857*9880d681SAndroid Build Coastguard Worker 
1858*9880d681SAndroid Build Coastguard Worker     /* Initialize new IV, double d = 0.0 in above example. */
1859*9880d681SAndroid Build Coastguard Worker     ConstantInt *C = nullptr;
1860*9880d681SAndroid Build Coastguard Worker     if (Incr->getOperand(0) == PH)
1861*9880d681SAndroid Build Coastguard Worker       C = dyn_cast<ConstantInt>(Incr->getOperand(1));
1862*9880d681SAndroid Build Coastguard Worker     else if (Incr->getOperand(1) == PH)
1863*9880d681SAndroid Build Coastguard Worker       C = dyn_cast<ConstantInt>(Incr->getOperand(0));
1864*9880d681SAndroid Build Coastguard Worker     else
1865*9880d681SAndroid Build Coastguard Worker       continue;
1866*9880d681SAndroid Build Coastguard Worker 
1867*9880d681SAndroid Build Coastguard Worker     if (!C) continue;
1868*9880d681SAndroid Build Coastguard Worker 
1869*9880d681SAndroid Build Coastguard Worker     // Ignore negative constants, as the code below doesn't handle them
1870*9880d681SAndroid Build Coastguard Worker     // correctly. TODO: Remove this restriction.
1871*9880d681SAndroid Build Coastguard Worker     if (!C->getValue().isStrictlyPositive()) continue;
1872*9880d681SAndroid Build Coastguard Worker 
1873*9880d681SAndroid Build Coastguard Worker     /* Add new PHINode. */
1874*9880d681SAndroid Build Coastguard Worker     PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
1875*9880d681SAndroid Build Coastguard Worker 
1876*9880d681SAndroid Build Coastguard Worker     /* create new increment. '++d' in above example. */
1877*9880d681SAndroid Build Coastguard Worker     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
1878*9880d681SAndroid Build Coastguard Worker     BinaryOperator *NewIncr =
1879*9880d681SAndroid Build Coastguard Worker       BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
1880*9880d681SAndroid Build Coastguard Worker                                Instruction::FAdd : Instruction::FSub,
1881*9880d681SAndroid Build Coastguard Worker                              NewPH, CFP, "IV.S.next.", Incr);
1882*9880d681SAndroid Build Coastguard Worker 
1883*9880d681SAndroid Build Coastguard Worker     NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
1884*9880d681SAndroid Build Coastguard Worker     NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
1885*9880d681SAndroid Build Coastguard Worker 
1886*9880d681SAndroid Build Coastguard Worker     /* Remove cast operation */
1887*9880d681SAndroid Build Coastguard Worker     ShadowUse->replaceAllUsesWith(NewPH);
1888*9880d681SAndroid Build Coastguard Worker     ShadowUse->eraseFromParent();
1889*9880d681SAndroid Build Coastguard Worker     Changed = true;
1890*9880d681SAndroid Build Coastguard Worker     break;
1891*9880d681SAndroid Build Coastguard Worker   }
1892*9880d681SAndroid Build Coastguard Worker }
1893*9880d681SAndroid Build Coastguard Worker 
1894*9880d681SAndroid Build Coastguard Worker /// If Cond has an operand that is an expression of an IV, set the IV user and
1895*9880d681SAndroid Build Coastguard Worker /// stride information and return true, otherwise return false.
FindIVUserForCond(ICmpInst * Cond,IVStrideUse * & CondUse)1896*9880d681SAndroid Build Coastguard Worker bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
1897*9880d681SAndroid Build Coastguard Worker   for (IVStrideUse &U : IU)
1898*9880d681SAndroid Build Coastguard Worker     if (U.getUser() == Cond) {
1899*9880d681SAndroid Build Coastguard Worker       // NOTE: we could handle setcc instructions with multiple uses here, but
1900*9880d681SAndroid Build Coastguard Worker       // InstCombine does it as well for simple uses, it's not clear that it
1901*9880d681SAndroid Build Coastguard Worker       // occurs enough in real life to handle.
1902*9880d681SAndroid Build Coastguard Worker       CondUse = &U;
1903*9880d681SAndroid Build Coastguard Worker       return true;
1904*9880d681SAndroid Build Coastguard Worker     }
1905*9880d681SAndroid Build Coastguard Worker   return false;
1906*9880d681SAndroid Build Coastguard Worker }
1907*9880d681SAndroid Build Coastguard Worker 
1908*9880d681SAndroid Build Coastguard Worker /// Rewrite the loop's terminating condition if it uses a max computation.
1909*9880d681SAndroid Build Coastguard Worker ///
1910*9880d681SAndroid Build Coastguard Worker /// This is a narrow solution to a specific, but acute, problem. For loops
1911*9880d681SAndroid Build Coastguard Worker /// like this:
1912*9880d681SAndroid Build Coastguard Worker ///
1913*9880d681SAndroid Build Coastguard Worker ///   i = 0;
1914*9880d681SAndroid Build Coastguard Worker ///   do {
1915*9880d681SAndroid Build Coastguard Worker ///     p[i] = 0.0;
1916*9880d681SAndroid Build Coastguard Worker ///   } while (++i < n);
1917*9880d681SAndroid Build Coastguard Worker ///
1918*9880d681SAndroid Build Coastguard Worker /// the trip count isn't just 'n', because 'n' might not be positive. And
1919*9880d681SAndroid Build Coastguard Worker /// unfortunately this can come up even for loops where the user didn't use
1920*9880d681SAndroid Build Coastguard Worker /// a C do-while loop. For example, seemingly well-behaved top-test loops
1921*9880d681SAndroid Build Coastguard Worker /// will commonly be lowered like this:
1922*9880d681SAndroid Build Coastguard Worker //
1923*9880d681SAndroid Build Coastguard Worker ///   if (n > 0) {
1924*9880d681SAndroid Build Coastguard Worker ///     i = 0;
1925*9880d681SAndroid Build Coastguard Worker ///     do {
1926*9880d681SAndroid Build Coastguard Worker ///       p[i] = 0.0;
1927*9880d681SAndroid Build Coastguard Worker ///     } while (++i < n);
1928*9880d681SAndroid Build Coastguard Worker ///   }
1929*9880d681SAndroid Build Coastguard Worker ///
1930*9880d681SAndroid Build Coastguard Worker /// and then it's possible for subsequent optimization to obscure the if
1931*9880d681SAndroid Build Coastguard Worker /// test in such a way that indvars can't find it.
1932*9880d681SAndroid Build Coastguard Worker ///
1933*9880d681SAndroid Build Coastguard Worker /// When indvars can't find the if test in loops like this, it creates a
1934*9880d681SAndroid Build Coastguard Worker /// max expression, which allows it to give the loop a canonical
1935*9880d681SAndroid Build Coastguard Worker /// induction variable:
1936*9880d681SAndroid Build Coastguard Worker ///
1937*9880d681SAndroid Build Coastguard Worker ///   i = 0;
1938*9880d681SAndroid Build Coastguard Worker ///   max = n < 1 ? 1 : n;
1939*9880d681SAndroid Build Coastguard Worker ///   do {
1940*9880d681SAndroid Build Coastguard Worker ///     p[i] = 0.0;
1941*9880d681SAndroid Build Coastguard Worker ///   } while (++i != max);
1942*9880d681SAndroid Build Coastguard Worker ///
1943*9880d681SAndroid Build Coastguard Worker /// Canonical induction variables are necessary because the loop passes
1944*9880d681SAndroid Build Coastguard Worker /// are designed around them. The most obvious example of this is the
1945*9880d681SAndroid Build Coastguard Worker /// LoopInfo analysis, which doesn't remember trip count values. It
1946*9880d681SAndroid Build Coastguard Worker /// expects to be able to rediscover the trip count each time it is
1947*9880d681SAndroid Build Coastguard Worker /// needed, and it does this using a simple analysis that only succeeds if
1948*9880d681SAndroid Build Coastguard Worker /// the loop has a canonical induction variable.
1949*9880d681SAndroid Build Coastguard Worker ///
1950*9880d681SAndroid Build Coastguard Worker /// However, when it comes time to generate code, the maximum operation
1951*9880d681SAndroid Build Coastguard Worker /// can be quite costly, especially if it's inside of an outer loop.
1952*9880d681SAndroid Build Coastguard Worker ///
1953*9880d681SAndroid Build Coastguard Worker /// This function solves this problem by detecting this type of loop and
1954*9880d681SAndroid Build Coastguard Worker /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
1955*9880d681SAndroid Build Coastguard Worker /// the instructions for the maximum computation.
1956*9880d681SAndroid Build Coastguard Worker ///
OptimizeMax(ICmpInst * Cond,IVStrideUse * & CondUse)1957*9880d681SAndroid Build Coastguard Worker ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
1958*9880d681SAndroid Build Coastguard Worker   // Check that the loop matches the pattern we're looking for.
1959*9880d681SAndroid Build Coastguard Worker   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
1960*9880d681SAndroid Build Coastguard Worker       Cond->getPredicate() != CmpInst::ICMP_NE)
1961*9880d681SAndroid Build Coastguard Worker     return Cond;
1962*9880d681SAndroid Build Coastguard Worker 
1963*9880d681SAndroid Build Coastguard Worker   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
1964*9880d681SAndroid Build Coastguard Worker   if (!Sel || !Sel->hasOneUse()) return Cond;
1965*9880d681SAndroid Build Coastguard Worker 
1966*9880d681SAndroid Build Coastguard Worker   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
1967*9880d681SAndroid Build Coastguard Worker   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1968*9880d681SAndroid Build Coastguard Worker     return Cond;
1969*9880d681SAndroid Build Coastguard Worker   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
1970*9880d681SAndroid Build Coastguard Worker 
1971*9880d681SAndroid Build Coastguard Worker   // Add one to the backedge-taken count to get the trip count.
1972*9880d681SAndroid Build Coastguard Worker   const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
1973*9880d681SAndroid Build Coastguard Worker   if (IterationCount != SE.getSCEV(Sel)) return Cond;
1974*9880d681SAndroid Build Coastguard Worker 
1975*9880d681SAndroid Build Coastguard Worker   // Check for a max calculation that matches the pattern. There's no check
1976*9880d681SAndroid Build Coastguard Worker   // for ICMP_ULE here because the comparison would be with zero, which
1977*9880d681SAndroid Build Coastguard Worker   // isn't interesting.
1978*9880d681SAndroid Build Coastguard Worker   CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1979*9880d681SAndroid Build Coastguard Worker   const SCEVNAryExpr *Max = nullptr;
1980*9880d681SAndroid Build Coastguard Worker   if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
1981*9880d681SAndroid Build Coastguard Worker     Pred = ICmpInst::ICMP_SLE;
1982*9880d681SAndroid Build Coastguard Worker     Max = S;
1983*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
1984*9880d681SAndroid Build Coastguard Worker     Pred = ICmpInst::ICMP_SLT;
1985*9880d681SAndroid Build Coastguard Worker     Max = S;
1986*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
1987*9880d681SAndroid Build Coastguard Worker     Pred = ICmpInst::ICMP_ULT;
1988*9880d681SAndroid Build Coastguard Worker     Max = U;
1989*9880d681SAndroid Build Coastguard Worker   } else {
1990*9880d681SAndroid Build Coastguard Worker     // No match; bail.
1991*9880d681SAndroid Build Coastguard Worker     return Cond;
1992*9880d681SAndroid Build Coastguard Worker   }
1993*9880d681SAndroid Build Coastguard Worker 
1994*9880d681SAndroid Build Coastguard Worker   // To handle a max with more than two operands, this optimization would
1995*9880d681SAndroid Build Coastguard Worker   // require additional checking and setup.
1996*9880d681SAndroid Build Coastguard Worker   if (Max->getNumOperands() != 2)
1997*9880d681SAndroid Build Coastguard Worker     return Cond;
1998*9880d681SAndroid Build Coastguard Worker 
1999*9880d681SAndroid Build Coastguard Worker   const SCEV *MaxLHS = Max->getOperand(0);
2000*9880d681SAndroid Build Coastguard Worker   const SCEV *MaxRHS = Max->getOperand(1);
2001*9880d681SAndroid Build Coastguard Worker 
2002*9880d681SAndroid Build Coastguard Worker   // ScalarEvolution canonicalizes constants to the left. For < and >, look
2003*9880d681SAndroid Build Coastguard Worker   // for a comparison with 1. For <= and >=, a comparison with zero.
2004*9880d681SAndroid Build Coastguard Worker   if (!MaxLHS ||
2005*9880d681SAndroid Build Coastguard Worker       (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2006*9880d681SAndroid Build Coastguard Worker     return Cond;
2007*9880d681SAndroid Build Coastguard Worker 
2008*9880d681SAndroid Build Coastguard Worker   // Check the relevant induction variable for conformance to
2009*9880d681SAndroid Build Coastguard Worker   // the pattern.
2010*9880d681SAndroid Build Coastguard Worker   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2011*9880d681SAndroid Build Coastguard Worker   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
2012*9880d681SAndroid Build Coastguard Worker   if (!AR || !AR->isAffine() ||
2013*9880d681SAndroid Build Coastguard Worker       AR->getStart() != One ||
2014*9880d681SAndroid Build Coastguard Worker       AR->getStepRecurrence(SE) != One)
2015*9880d681SAndroid Build Coastguard Worker     return Cond;
2016*9880d681SAndroid Build Coastguard Worker 
2017*9880d681SAndroid Build Coastguard Worker   assert(AR->getLoop() == L &&
2018*9880d681SAndroid Build Coastguard Worker          "Loop condition operand is an addrec in a different loop!");
2019*9880d681SAndroid Build Coastguard Worker 
2020*9880d681SAndroid Build Coastguard Worker   // Check the right operand of the select, and remember it, as it will
2021*9880d681SAndroid Build Coastguard Worker   // be used in the new comparison instruction.
2022*9880d681SAndroid Build Coastguard Worker   Value *NewRHS = nullptr;
2023*9880d681SAndroid Build Coastguard Worker   if (ICmpInst::isTrueWhenEqual(Pred)) {
2024*9880d681SAndroid Build Coastguard Worker     // Look for n+1, and grab n.
2025*9880d681SAndroid Build Coastguard Worker     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
2026*9880d681SAndroid Build Coastguard Worker       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2027*9880d681SAndroid Build Coastguard Worker          if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2028*9880d681SAndroid Build Coastguard Worker            NewRHS = BO->getOperand(0);
2029*9880d681SAndroid Build Coastguard Worker     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
2030*9880d681SAndroid Build Coastguard Worker       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2031*9880d681SAndroid Build Coastguard Worker         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2032*9880d681SAndroid Build Coastguard Worker           NewRHS = BO->getOperand(0);
2033*9880d681SAndroid Build Coastguard Worker     if (!NewRHS)
2034*9880d681SAndroid Build Coastguard Worker       return Cond;
2035*9880d681SAndroid Build Coastguard Worker   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
2036*9880d681SAndroid Build Coastguard Worker     NewRHS = Sel->getOperand(1);
2037*9880d681SAndroid Build Coastguard Worker   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
2038*9880d681SAndroid Build Coastguard Worker     NewRHS = Sel->getOperand(2);
2039*9880d681SAndroid Build Coastguard Worker   else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2040*9880d681SAndroid Build Coastguard Worker     NewRHS = SU->getValue();
2041*9880d681SAndroid Build Coastguard Worker   else
2042*9880d681SAndroid Build Coastguard Worker     // Max doesn't match expected pattern.
2043*9880d681SAndroid Build Coastguard Worker     return Cond;
2044*9880d681SAndroid Build Coastguard Worker 
2045*9880d681SAndroid Build Coastguard Worker   // Determine the new comparison opcode. It may be signed or unsigned,
2046*9880d681SAndroid Build Coastguard Worker   // and the original comparison may be either equality or inequality.
2047*9880d681SAndroid Build Coastguard Worker   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2048*9880d681SAndroid Build Coastguard Worker     Pred = CmpInst::getInversePredicate(Pred);
2049*9880d681SAndroid Build Coastguard Worker 
2050*9880d681SAndroid Build Coastguard Worker   // Ok, everything looks ok to change the condition into an SLT or SGE and
2051*9880d681SAndroid Build Coastguard Worker   // delete the max calculation.
2052*9880d681SAndroid Build Coastguard Worker   ICmpInst *NewCond =
2053*9880d681SAndroid Build Coastguard Worker     new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
2054*9880d681SAndroid Build Coastguard Worker 
2055*9880d681SAndroid Build Coastguard Worker   // Delete the max calculation instructions.
2056*9880d681SAndroid Build Coastguard Worker   Cond->replaceAllUsesWith(NewCond);
2057*9880d681SAndroid Build Coastguard Worker   CondUse->setUser(NewCond);
2058*9880d681SAndroid Build Coastguard Worker   Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
2059*9880d681SAndroid Build Coastguard Worker   Cond->eraseFromParent();
2060*9880d681SAndroid Build Coastguard Worker   Sel->eraseFromParent();
2061*9880d681SAndroid Build Coastguard Worker   if (Cmp->use_empty())
2062*9880d681SAndroid Build Coastguard Worker     Cmp->eraseFromParent();
2063*9880d681SAndroid Build Coastguard Worker   return NewCond;
2064*9880d681SAndroid Build Coastguard Worker }
2065*9880d681SAndroid Build Coastguard Worker 
2066*9880d681SAndroid Build Coastguard Worker /// Change loop terminating condition to use the postinc iv when possible.
2067*9880d681SAndroid Build Coastguard Worker void
OptimizeLoopTermCond()2068*9880d681SAndroid Build Coastguard Worker LSRInstance::OptimizeLoopTermCond() {
2069*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<Instruction *, 4> PostIncs;
2070*9880d681SAndroid Build Coastguard Worker 
2071*9880d681SAndroid Build Coastguard Worker   BasicBlock *LatchBlock = L->getLoopLatch();
2072*9880d681SAndroid Build Coastguard Worker   SmallVector<BasicBlock*, 8> ExitingBlocks;
2073*9880d681SAndroid Build Coastguard Worker   L->getExitingBlocks(ExitingBlocks);
2074*9880d681SAndroid Build Coastguard Worker 
2075*9880d681SAndroid Build Coastguard Worker   for (BasicBlock *ExitingBlock : ExitingBlocks) {
2076*9880d681SAndroid Build Coastguard Worker 
2077*9880d681SAndroid Build Coastguard Worker     // Get the terminating condition for the loop if possible.  If we
2078*9880d681SAndroid Build Coastguard Worker     // can, we want to change it to use a post-incremented version of its
2079*9880d681SAndroid Build Coastguard Worker     // induction variable, to allow coalescing the live ranges for the IV into
2080*9880d681SAndroid Build Coastguard Worker     // one register value.
2081*9880d681SAndroid Build Coastguard Worker 
2082*9880d681SAndroid Build Coastguard Worker     BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2083*9880d681SAndroid Build Coastguard Worker     if (!TermBr)
2084*9880d681SAndroid Build Coastguard Worker       continue;
2085*9880d681SAndroid Build Coastguard Worker     // FIXME: Overly conservative, termination condition could be an 'or' etc..
2086*9880d681SAndroid Build Coastguard Worker     if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
2087*9880d681SAndroid Build Coastguard Worker       continue;
2088*9880d681SAndroid Build Coastguard Worker 
2089*9880d681SAndroid Build Coastguard Worker     // Search IVUsesByStride to find Cond's IVUse if there is one.
2090*9880d681SAndroid Build Coastguard Worker     IVStrideUse *CondUse = nullptr;
2091*9880d681SAndroid Build Coastguard Worker     ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
2092*9880d681SAndroid Build Coastguard Worker     if (!FindIVUserForCond(Cond, CondUse))
2093*9880d681SAndroid Build Coastguard Worker       continue;
2094*9880d681SAndroid Build Coastguard Worker 
2095*9880d681SAndroid Build Coastguard Worker     // If the trip count is computed in terms of a max (due to ScalarEvolution
2096*9880d681SAndroid Build Coastguard Worker     // being unable to find a sufficient guard, for example), change the loop
2097*9880d681SAndroid Build Coastguard Worker     // comparison to use SLT or ULT instead of NE.
2098*9880d681SAndroid Build Coastguard Worker     // One consequence of doing this now is that it disrupts the count-down
2099*9880d681SAndroid Build Coastguard Worker     // optimization. That's not always a bad thing though, because in such
2100*9880d681SAndroid Build Coastguard Worker     // cases it may still be worthwhile to avoid a max.
2101*9880d681SAndroid Build Coastguard Worker     Cond = OptimizeMax(Cond, CondUse);
2102*9880d681SAndroid Build Coastguard Worker 
2103*9880d681SAndroid Build Coastguard Worker     // If this exiting block dominates the latch block, it may also use
2104*9880d681SAndroid Build Coastguard Worker     // the post-inc value if it won't be shared with other uses.
2105*9880d681SAndroid Build Coastguard Worker     // Check for dominance.
2106*9880d681SAndroid Build Coastguard Worker     if (!DT.dominates(ExitingBlock, LatchBlock))
2107*9880d681SAndroid Build Coastguard Worker       continue;
2108*9880d681SAndroid Build Coastguard Worker 
2109*9880d681SAndroid Build Coastguard Worker     // Conservatively avoid trying to use the post-inc value in non-latch
2110*9880d681SAndroid Build Coastguard Worker     // exits if there may be pre-inc users in intervening blocks.
2111*9880d681SAndroid Build Coastguard Worker     if (LatchBlock != ExitingBlock)
2112*9880d681SAndroid Build Coastguard Worker       for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
2113*9880d681SAndroid Build Coastguard Worker         // Test if the use is reachable from the exiting block. This dominator
2114*9880d681SAndroid Build Coastguard Worker         // query is a conservative approximation of reachability.
2115*9880d681SAndroid Build Coastguard Worker         if (&*UI != CondUse &&
2116*9880d681SAndroid Build Coastguard Worker             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
2117*9880d681SAndroid Build Coastguard Worker           // Conservatively assume there may be reuse if the quotient of their
2118*9880d681SAndroid Build Coastguard Worker           // strides could be a legal scale.
2119*9880d681SAndroid Build Coastguard Worker           const SCEV *A = IU.getStride(*CondUse, L);
2120*9880d681SAndroid Build Coastguard Worker           const SCEV *B = IU.getStride(*UI, L);
2121*9880d681SAndroid Build Coastguard Worker           if (!A || !B) continue;
2122*9880d681SAndroid Build Coastguard Worker           if (SE.getTypeSizeInBits(A->getType()) !=
2123*9880d681SAndroid Build Coastguard Worker               SE.getTypeSizeInBits(B->getType())) {
2124*9880d681SAndroid Build Coastguard Worker             if (SE.getTypeSizeInBits(A->getType()) >
2125*9880d681SAndroid Build Coastguard Worker                 SE.getTypeSizeInBits(B->getType()))
2126*9880d681SAndroid Build Coastguard Worker               B = SE.getSignExtendExpr(B, A->getType());
2127*9880d681SAndroid Build Coastguard Worker             else
2128*9880d681SAndroid Build Coastguard Worker               A = SE.getSignExtendExpr(A, B->getType());
2129*9880d681SAndroid Build Coastguard Worker           }
2130*9880d681SAndroid Build Coastguard Worker           if (const SCEVConstant *D =
2131*9880d681SAndroid Build Coastguard Worker                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
2132*9880d681SAndroid Build Coastguard Worker             const ConstantInt *C = D->getValue();
2133*9880d681SAndroid Build Coastguard Worker             // Stride of one or negative one can have reuse with non-addresses.
2134*9880d681SAndroid Build Coastguard Worker             if (C->isOne() || C->isAllOnesValue())
2135*9880d681SAndroid Build Coastguard Worker               goto decline_post_inc;
2136*9880d681SAndroid Build Coastguard Worker             // Avoid weird situations.
2137*9880d681SAndroid Build Coastguard Worker             if (C->getValue().getMinSignedBits() >= 64 ||
2138*9880d681SAndroid Build Coastguard Worker                 C->getValue().isMinSignedValue())
2139*9880d681SAndroid Build Coastguard Worker               goto decline_post_inc;
2140*9880d681SAndroid Build Coastguard Worker             // Check for possible scaled-address reuse.
2141*9880d681SAndroid Build Coastguard Worker             MemAccessTy AccessTy = getAccessType(UI->getUser());
2142*9880d681SAndroid Build Coastguard Worker             int64_t Scale = C->getSExtValue();
2143*9880d681SAndroid Build Coastguard Worker             if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2144*9880d681SAndroid Build Coastguard Worker                                           /*BaseOffset=*/0,
2145*9880d681SAndroid Build Coastguard Worker                                           /*HasBaseReg=*/false, Scale,
2146*9880d681SAndroid Build Coastguard Worker                                           AccessTy.AddrSpace))
2147*9880d681SAndroid Build Coastguard Worker               goto decline_post_inc;
2148*9880d681SAndroid Build Coastguard Worker             Scale = -Scale;
2149*9880d681SAndroid Build Coastguard Worker             if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
2150*9880d681SAndroid Build Coastguard Worker                                           /*BaseOffset=*/0,
2151*9880d681SAndroid Build Coastguard Worker                                           /*HasBaseReg=*/false, Scale,
2152*9880d681SAndroid Build Coastguard Worker                                           AccessTy.AddrSpace))
2153*9880d681SAndroid Build Coastguard Worker               goto decline_post_inc;
2154*9880d681SAndroid Build Coastguard Worker           }
2155*9880d681SAndroid Build Coastguard Worker         }
2156*9880d681SAndroid Build Coastguard Worker 
2157*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
2158*9880d681SAndroid Build Coastguard Worker                  << *Cond << '\n');
2159*9880d681SAndroid Build Coastguard Worker 
2160*9880d681SAndroid Build Coastguard Worker     // It's possible for the setcc instruction to be anywhere in the loop, and
2161*9880d681SAndroid Build Coastguard Worker     // possible for it to have multiple users.  If it is not immediately before
2162*9880d681SAndroid Build Coastguard Worker     // the exiting block branch, move it.
2163*9880d681SAndroid Build Coastguard Worker     if (&*++BasicBlock::iterator(Cond) != TermBr) {
2164*9880d681SAndroid Build Coastguard Worker       if (Cond->hasOneUse()) {
2165*9880d681SAndroid Build Coastguard Worker         Cond->moveBefore(TermBr);
2166*9880d681SAndroid Build Coastguard Worker       } else {
2167*9880d681SAndroid Build Coastguard Worker         // Clone the terminating condition and insert into the loopend.
2168*9880d681SAndroid Build Coastguard Worker         ICmpInst *OldCond = Cond;
2169*9880d681SAndroid Build Coastguard Worker         Cond = cast<ICmpInst>(Cond->clone());
2170*9880d681SAndroid Build Coastguard Worker         Cond->setName(L->getHeader()->getName() + ".termcond");
2171*9880d681SAndroid Build Coastguard Worker         ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
2172*9880d681SAndroid Build Coastguard Worker 
2173*9880d681SAndroid Build Coastguard Worker         // Clone the IVUse, as the old use still exists!
2174*9880d681SAndroid Build Coastguard Worker         CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
2175*9880d681SAndroid Build Coastguard Worker         TermBr->replaceUsesOfWith(OldCond, Cond);
2176*9880d681SAndroid Build Coastguard Worker       }
2177*9880d681SAndroid Build Coastguard Worker     }
2178*9880d681SAndroid Build Coastguard Worker 
2179*9880d681SAndroid Build Coastguard Worker     // If we get to here, we know that we can transform the setcc instruction to
2180*9880d681SAndroid Build Coastguard Worker     // use the post-incremented version of the IV, allowing us to coalesce the
2181*9880d681SAndroid Build Coastguard Worker     // live ranges for the IV correctly.
2182*9880d681SAndroid Build Coastguard Worker     CondUse->transformToPostInc(L);
2183*9880d681SAndroid Build Coastguard Worker     Changed = true;
2184*9880d681SAndroid Build Coastguard Worker 
2185*9880d681SAndroid Build Coastguard Worker     PostIncs.insert(Cond);
2186*9880d681SAndroid Build Coastguard Worker   decline_post_inc:;
2187*9880d681SAndroid Build Coastguard Worker   }
2188*9880d681SAndroid Build Coastguard Worker 
2189*9880d681SAndroid Build Coastguard Worker   // Determine an insertion point for the loop induction variable increment. It
2190*9880d681SAndroid Build Coastguard Worker   // must dominate all the post-inc comparisons we just set up, and it must
2191*9880d681SAndroid Build Coastguard Worker   // dominate the loop latch edge.
2192*9880d681SAndroid Build Coastguard Worker   IVIncInsertPos = L->getLoopLatch()->getTerminator();
2193*9880d681SAndroid Build Coastguard Worker   for (Instruction *Inst : PostIncs) {
2194*9880d681SAndroid Build Coastguard Worker     BasicBlock *BB =
2195*9880d681SAndroid Build Coastguard Worker       DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
2196*9880d681SAndroid Build Coastguard Worker                                     Inst->getParent());
2197*9880d681SAndroid Build Coastguard Worker     if (BB == Inst->getParent())
2198*9880d681SAndroid Build Coastguard Worker       IVIncInsertPos = Inst;
2199*9880d681SAndroid Build Coastguard Worker     else if (BB != IVIncInsertPos->getParent())
2200*9880d681SAndroid Build Coastguard Worker       IVIncInsertPos = BB->getTerminator();
2201*9880d681SAndroid Build Coastguard Worker   }
2202*9880d681SAndroid Build Coastguard Worker }
2203*9880d681SAndroid Build Coastguard Worker 
2204*9880d681SAndroid Build Coastguard Worker /// Determine if the given use can accommodate a fixup at the given offset and
2205*9880d681SAndroid Build Coastguard Worker /// other details. If so, update the use and return true.
reconcileNewOffset(LSRUse & LU,int64_t NewOffset,bool HasBaseReg,LSRUse::KindType Kind,MemAccessTy AccessTy)2206*9880d681SAndroid Build Coastguard Worker bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
2207*9880d681SAndroid Build Coastguard Worker                                      bool HasBaseReg, LSRUse::KindType Kind,
2208*9880d681SAndroid Build Coastguard Worker                                      MemAccessTy AccessTy) {
2209*9880d681SAndroid Build Coastguard Worker   int64_t NewMinOffset = LU.MinOffset;
2210*9880d681SAndroid Build Coastguard Worker   int64_t NewMaxOffset = LU.MaxOffset;
2211*9880d681SAndroid Build Coastguard Worker   MemAccessTy NewAccessTy = AccessTy;
2212*9880d681SAndroid Build Coastguard Worker 
2213*9880d681SAndroid Build Coastguard Worker   // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2214*9880d681SAndroid Build Coastguard Worker   // something conservative, however this can pessimize in the case that one of
2215*9880d681SAndroid Build Coastguard Worker   // the uses will have all its uses outside the loop, for example.
2216*9880d681SAndroid Build Coastguard Worker   if (LU.Kind != Kind)
2217*9880d681SAndroid Build Coastguard Worker     return false;
2218*9880d681SAndroid Build Coastguard Worker 
2219*9880d681SAndroid Build Coastguard Worker   // Check for a mismatched access type, and fall back conservatively as needed.
2220*9880d681SAndroid Build Coastguard Worker   // TODO: Be less conservative when the type is similar and can use the same
2221*9880d681SAndroid Build Coastguard Worker   // addressing modes.
2222*9880d681SAndroid Build Coastguard Worker   if (Kind == LSRUse::Address) {
2223*9880d681SAndroid Build Coastguard Worker     if (AccessTy != LU.AccessTy)
2224*9880d681SAndroid Build Coastguard Worker       NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
2225*9880d681SAndroid Build Coastguard Worker   }
2226*9880d681SAndroid Build Coastguard Worker 
2227*9880d681SAndroid Build Coastguard Worker   // Conservatively assume HasBaseReg is true for now.
2228*9880d681SAndroid Build Coastguard Worker   if (NewOffset < LU.MinOffset) {
2229*9880d681SAndroid Build Coastguard Worker     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2230*9880d681SAndroid Build Coastguard Worker                           LU.MaxOffset - NewOffset, HasBaseReg))
2231*9880d681SAndroid Build Coastguard Worker       return false;
2232*9880d681SAndroid Build Coastguard Worker     NewMinOffset = NewOffset;
2233*9880d681SAndroid Build Coastguard Worker   } else if (NewOffset > LU.MaxOffset) {
2234*9880d681SAndroid Build Coastguard Worker     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
2235*9880d681SAndroid Build Coastguard Worker                           NewOffset - LU.MinOffset, HasBaseReg))
2236*9880d681SAndroid Build Coastguard Worker       return false;
2237*9880d681SAndroid Build Coastguard Worker     NewMaxOffset = NewOffset;
2238*9880d681SAndroid Build Coastguard Worker   }
2239*9880d681SAndroid Build Coastguard Worker 
2240*9880d681SAndroid Build Coastguard Worker   // Update the use.
2241*9880d681SAndroid Build Coastguard Worker   LU.MinOffset = NewMinOffset;
2242*9880d681SAndroid Build Coastguard Worker   LU.MaxOffset = NewMaxOffset;
2243*9880d681SAndroid Build Coastguard Worker   LU.AccessTy = NewAccessTy;
2244*9880d681SAndroid Build Coastguard Worker   if (NewOffset != LU.Offsets.back())
2245*9880d681SAndroid Build Coastguard Worker     LU.Offsets.push_back(NewOffset);
2246*9880d681SAndroid Build Coastguard Worker   return true;
2247*9880d681SAndroid Build Coastguard Worker }
2248*9880d681SAndroid Build Coastguard Worker 
2249*9880d681SAndroid Build Coastguard Worker /// Return an LSRUse index and an offset value for a fixup which needs the given
2250*9880d681SAndroid Build Coastguard Worker /// expression, with the given kind and optional access type.  Either reuse an
2251*9880d681SAndroid Build Coastguard Worker /// existing use or create a new one, as needed.
getUse(const SCEV * & Expr,LSRUse::KindType Kind,MemAccessTy AccessTy)2252*9880d681SAndroid Build Coastguard Worker std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
2253*9880d681SAndroid Build Coastguard Worker                                                LSRUse::KindType Kind,
2254*9880d681SAndroid Build Coastguard Worker                                                MemAccessTy AccessTy) {
2255*9880d681SAndroid Build Coastguard Worker   const SCEV *Copy = Expr;
2256*9880d681SAndroid Build Coastguard Worker   int64_t Offset = ExtractImmediate(Expr, SE);
2257*9880d681SAndroid Build Coastguard Worker 
2258*9880d681SAndroid Build Coastguard Worker   // Basic uses can't accept any offset, for example.
2259*9880d681SAndroid Build Coastguard Worker   if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
2260*9880d681SAndroid Build Coastguard Worker                         Offset, /*HasBaseReg=*/ true)) {
2261*9880d681SAndroid Build Coastguard Worker     Expr = Copy;
2262*9880d681SAndroid Build Coastguard Worker     Offset = 0;
2263*9880d681SAndroid Build Coastguard Worker   }
2264*9880d681SAndroid Build Coastguard Worker 
2265*9880d681SAndroid Build Coastguard Worker   std::pair<UseMapTy::iterator, bool> P =
2266*9880d681SAndroid Build Coastguard Worker     UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
2267*9880d681SAndroid Build Coastguard Worker   if (!P.second) {
2268*9880d681SAndroid Build Coastguard Worker     // A use already existed with this base.
2269*9880d681SAndroid Build Coastguard Worker     size_t LUIdx = P.first->second;
2270*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
2271*9880d681SAndroid Build Coastguard Worker     if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
2272*9880d681SAndroid Build Coastguard Worker       // Reuse this use.
2273*9880d681SAndroid Build Coastguard Worker       return std::make_pair(LUIdx, Offset);
2274*9880d681SAndroid Build Coastguard Worker   }
2275*9880d681SAndroid Build Coastguard Worker 
2276*9880d681SAndroid Build Coastguard Worker   // Create a new use.
2277*9880d681SAndroid Build Coastguard Worker   size_t LUIdx = Uses.size();
2278*9880d681SAndroid Build Coastguard Worker   P.first->second = LUIdx;
2279*9880d681SAndroid Build Coastguard Worker   Uses.push_back(LSRUse(Kind, AccessTy));
2280*9880d681SAndroid Build Coastguard Worker   LSRUse &LU = Uses[LUIdx];
2281*9880d681SAndroid Build Coastguard Worker 
2282*9880d681SAndroid Build Coastguard Worker   // We don't need to track redundant offsets, but we don't need to go out
2283*9880d681SAndroid Build Coastguard Worker   // of our way here to avoid them.
2284*9880d681SAndroid Build Coastguard Worker   if (LU.Offsets.empty() || Offset != LU.Offsets.back())
2285*9880d681SAndroid Build Coastguard Worker     LU.Offsets.push_back(Offset);
2286*9880d681SAndroid Build Coastguard Worker 
2287*9880d681SAndroid Build Coastguard Worker   LU.MinOffset = Offset;
2288*9880d681SAndroid Build Coastguard Worker   LU.MaxOffset = Offset;
2289*9880d681SAndroid Build Coastguard Worker   return std::make_pair(LUIdx, Offset);
2290*9880d681SAndroid Build Coastguard Worker }
2291*9880d681SAndroid Build Coastguard Worker 
2292*9880d681SAndroid Build Coastguard Worker /// Delete the given use from the Uses list.
DeleteUse(LSRUse & LU,size_t LUIdx)2293*9880d681SAndroid Build Coastguard Worker void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2294*9880d681SAndroid Build Coastguard Worker   if (&LU != &Uses.back())
2295*9880d681SAndroid Build Coastguard Worker     std::swap(LU, Uses.back());
2296*9880d681SAndroid Build Coastguard Worker   Uses.pop_back();
2297*9880d681SAndroid Build Coastguard Worker 
2298*9880d681SAndroid Build Coastguard Worker   // Update RegUses.
2299*9880d681SAndroid Build Coastguard Worker   RegUses.swapAndDropUse(LUIdx, Uses.size());
2300*9880d681SAndroid Build Coastguard Worker }
2301*9880d681SAndroid Build Coastguard Worker 
2302*9880d681SAndroid Build Coastguard Worker /// Look for a use distinct from OrigLU which is has a formula that has the same
2303*9880d681SAndroid Build Coastguard Worker /// registers as the given formula.
2304*9880d681SAndroid Build Coastguard Worker LSRUse *
FindUseWithSimilarFormula(const Formula & OrigF,const LSRUse & OrigLU)2305*9880d681SAndroid Build Coastguard Worker LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2306*9880d681SAndroid Build Coastguard Worker                                        const LSRUse &OrigLU) {
2307*9880d681SAndroid Build Coastguard Worker   // Search all uses for the formula. This could be more clever.
2308*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
2309*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
2310*9880d681SAndroid Build Coastguard Worker     // Check whether this use is close enough to OrigLU, to see whether it's
2311*9880d681SAndroid Build Coastguard Worker     // worthwhile looking through its formulae.
2312*9880d681SAndroid Build Coastguard Worker     // Ignore ICmpZero uses because they may contain formulae generated by
2313*9880d681SAndroid Build Coastguard Worker     // GenerateICmpZeroScales, in which case adding fixup offsets may
2314*9880d681SAndroid Build Coastguard Worker     // be invalid.
2315*9880d681SAndroid Build Coastguard Worker     if (&LU != &OrigLU &&
2316*9880d681SAndroid Build Coastguard Worker         LU.Kind != LSRUse::ICmpZero &&
2317*9880d681SAndroid Build Coastguard Worker         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2318*9880d681SAndroid Build Coastguard Worker         LU.WidestFixupType == OrigLU.WidestFixupType &&
2319*9880d681SAndroid Build Coastguard Worker         LU.HasFormulaWithSameRegs(OrigF)) {
2320*9880d681SAndroid Build Coastguard Worker       // Scan through this use's formulae.
2321*9880d681SAndroid Build Coastguard Worker       for (const Formula &F : LU.Formulae) {
2322*9880d681SAndroid Build Coastguard Worker         // Check to see if this formula has the same registers and symbols
2323*9880d681SAndroid Build Coastguard Worker         // as OrigF.
2324*9880d681SAndroid Build Coastguard Worker         if (F.BaseRegs == OrigF.BaseRegs &&
2325*9880d681SAndroid Build Coastguard Worker             F.ScaledReg == OrigF.ScaledReg &&
2326*9880d681SAndroid Build Coastguard Worker             F.BaseGV == OrigF.BaseGV &&
2327*9880d681SAndroid Build Coastguard Worker             F.Scale == OrigF.Scale &&
2328*9880d681SAndroid Build Coastguard Worker             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2329*9880d681SAndroid Build Coastguard Worker           if (F.BaseOffset == 0)
2330*9880d681SAndroid Build Coastguard Worker             return &LU;
2331*9880d681SAndroid Build Coastguard Worker           // This is the formula where all the registers and symbols matched;
2332*9880d681SAndroid Build Coastguard Worker           // there aren't going to be any others. Since we declined it, we
2333*9880d681SAndroid Build Coastguard Worker           // can skip the rest of the formulae and proceed to the next LSRUse.
2334*9880d681SAndroid Build Coastguard Worker           break;
2335*9880d681SAndroid Build Coastguard Worker         }
2336*9880d681SAndroid Build Coastguard Worker       }
2337*9880d681SAndroid Build Coastguard Worker     }
2338*9880d681SAndroid Build Coastguard Worker   }
2339*9880d681SAndroid Build Coastguard Worker 
2340*9880d681SAndroid Build Coastguard Worker   // Nothing looked good.
2341*9880d681SAndroid Build Coastguard Worker   return nullptr;
2342*9880d681SAndroid Build Coastguard Worker }
2343*9880d681SAndroid Build Coastguard Worker 
CollectInterestingTypesAndFactors()2344*9880d681SAndroid Build Coastguard Worker void LSRInstance::CollectInterestingTypesAndFactors() {
2345*9880d681SAndroid Build Coastguard Worker   SmallSetVector<const SCEV *, 4> Strides;
2346*9880d681SAndroid Build Coastguard Worker 
2347*9880d681SAndroid Build Coastguard Worker   // Collect interesting types and strides.
2348*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Worklist;
2349*9880d681SAndroid Build Coastguard Worker   for (const IVStrideUse &U : IU) {
2350*9880d681SAndroid Build Coastguard Worker     const SCEV *Expr = IU.getExpr(U);
2351*9880d681SAndroid Build Coastguard Worker 
2352*9880d681SAndroid Build Coastguard Worker     // Collect interesting types.
2353*9880d681SAndroid Build Coastguard Worker     Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
2354*9880d681SAndroid Build Coastguard Worker 
2355*9880d681SAndroid Build Coastguard Worker     // Add strides for mentioned loops.
2356*9880d681SAndroid Build Coastguard Worker     Worklist.push_back(Expr);
2357*9880d681SAndroid Build Coastguard Worker     do {
2358*9880d681SAndroid Build Coastguard Worker       const SCEV *S = Worklist.pop_back_val();
2359*9880d681SAndroid Build Coastguard Worker       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2360*9880d681SAndroid Build Coastguard Worker         if (AR->getLoop() == L)
2361*9880d681SAndroid Build Coastguard Worker           Strides.insert(AR->getStepRecurrence(SE));
2362*9880d681SAndroid Build Coastguard Worker         Worklist.push_back(AR->getStart());
2363*9880d681SAndroid Build Coastguard Worker       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2364*9880d681SAndroid Build Coastguard Worker         Worklist.append(Add->op_begin(), Add->op_end());
2365*9880d681SAndroid Build Coastguard Worker       }
2366*9880d681SAndroid Build Coastguard Worker     } while (!Worklist.empty());
2367*9880d681SAndroid Build Coastguard Worker   }
2368*9880d681SAndroid Build Coastguard Worker 
2369*9880d681SAndroid Build Coastguard Worker   // Compute interesting factors from the set of interesting strides.
2370*9880d681SAndroid Build Coastguard Worker   for (SmallSetVector<const SCEV *, 4>::const_iterator
2371*9880d681SAndroid Build Coastguard Worker        I = Strides.begin(), E = Strides.end(); I != E; ++I)
2372*9880d681SAndroid Build Coastguard Worker     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2373*9880d681SAndroid Build Coastguard Worker          std::next(I); NewStrideIter != E; ++NewStrideIter) {
2374*9880d681SAndroid Build Coastguard Worker       const SCEV *OldStride = *I;
2375*9880d681SAndroid Build Coastguard Worker       const SCEV *NewStride = *NewStrideIter;
2376*9880d681SAndroid Build Coastguard Worker 
2377*9880d681SAndroid Build Coastguard Worker       if (SE.getTypeSizeInBits(OldStride->getType()) !=
2378*9880d681SAndroid Build Coastguard Worker           SE.getTypeSizeInBits(NewStride->getType())) {
2379*9880d681SAndroid Build Coastguard Worker         if (SE.getTypeSizeInBits(OldStride->getType()) >
2380*9880d681SAndroid Build Coastguard Worker             SE.getTypeSizeInBits(NewStride->getType()))
2381*9880d681SAndroid Build Coastguard Worker           NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
2382*9880d681SAndroid Build Coastguard Worker         else
2383*9880d681SAndroid Build Coastguard Worker           OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
2384*9880d681SAndroid Build Coastguard Worker       }
2385*9880d681SAndroid Build Coastguard Worker       if (const SCEVConstant *Factor =
2386*9880d681SAndroid Build Coastguard Worker             dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
2387*9880d681SAndroid Build Coastguard Worker                                                         SE, true))) {
2388*9880d681SAndroid Build Coastguard Worker         if (Factor->getAPInt().getMinSignedBits() <= 64)
2389*9880d681SAndroid Build Coastguard Worker           Factors.insert(Factor->getAPInt().getSExtValue());
2390*9880d681SAndroid Build Coastguard Worker       } else if (const SCEVConstant *Factor =
2391*9880d681SAndroid Build Coastguard Worker                    dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
2392*9880d681SAndroid Build Coastguard Worker                                                                NewStride,
2393*9880d681SAndroid Build Coastguard Worker                                                                SE, true))) {
2394*9880d681SAndroid Build Coastguard Worker         if (Factor->getAPInt().getMinSignedBits() <= 64)
2395*9880d681SAndroid Build Coastguard Worker           Factors.insert(Factor->getAPInt().getSExtValue());
2396*9880d681SAndroid Build Coastguard Worker       }
2397*9880d681SAndroid Build Coastguard Worker     }
2398*9880d681SAndroid Build Coastguard Worker 
2399*9880d681SAndroid Build Coastguard Worker   // If all uses use the same type, don't bother looking for truncation-based
2400*9880d681SAndroid Build Coastguard Worker   // reuse.
2401*9880d681SAndroid Build Coastguard Worker   if (Types.size() == 1)
2402*9880d681SAndroid Build Coastguard Worker     Types.clear();
2403*9880d681SAndroid Build Coastguard Worker 
2404*9880d681SAndroid Build Coastguard Worker   DEBUG(print_factors_and_types(dbgs()));
2405*9880d681SAndroid Build Coastguard Worker }
2406*9880d681SAndroid Build Coastguard Worker 
2407*9880d681SAndroid Build Coastguard Worker /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2408*9880d681SAndroid Build Coastguard Worker /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2409*9880d681SAndroid Build Coastguard Worker /// IVStrideUses, we could partially skip this.
2410*9880d681SAndroid Build Coastguard Worker static User::op_iterator
findIVOperand(User::op_iterator OI,User::op_iterator OE,Loop * L,ScalarEvolution & SE)2411*9880d681SAndroid Build Coastguard Worker findIVOperand(User::op_iterator OI, User::op_iterator OE,
2412*9880d681SAndroid Build Coastguard Worker               Loop *L, ScalarEvolution &SE) {
2413*9880d681SAndroid Build Coastguard Worker   for(; OI != OE; ++OI) {
2414*9880d681SAndroid Build Coastguard Worker     if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2415*9880d681SAndroid Build Coastguard Worker       if (!SE.isSCEVable(Oper->getType()))
2416*9880d681SAndroid Build Coastguard Worker         continue;
2417*9880d681SAndroid Build Coastguard Worker 
2418*9880d681SAndroid Build Coastguard Worker       if (const SCEVAddRecExpr *AR =
2419*9880d681SAndroid Build Coastguard Worker           dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
2420*9880d681SAndroid Build Coastguard Worker         if (AR->getLoop() == L)
2421*9880d681SAndroid Build Coastguard Worker           break;
2422*9880d681SAndroid Build Coastguard Worker       }
2423*9880d681SAndroid Build Coastguard Worker     }
2424*9880d681SAndroid Build Coastguard Worker   }
2425*9880d681SAndroid Build Coastguard Worker   return OI;
2426*9880d681SAndroid Build Coastguard Worker }
2427*9880d681SAndroid Build Coastguard Worker 
2428*9880d681SAndroid Build Coastguard Worker /// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
2429*9880d681SAndroid Build Coastguard Worker /// a convenient helper.
getWideOperand(Value * Oper)2430*9880d681SAndroid Build Coastguard Worker static Value *getWideOperand(Value *Oper) {
2431*9880d681SAndroid Build Coastguard Worker   if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2432*9880d681SAndroid Build Coastguard Worker     return Trunc->getOperand(0);
2433*9880d681SAndroid Build Coastguard Worker   return Oper;
2434*9880d681SAndroid Build Coastguard Worker }
2435*9880d681SAndroid Build Coastguard Worker 
2436*9880d681SAndroid Build Coastguard Worker /// Return true if we allow an IV chain to include both types.
isCompatibleIVType(Value * LVal,Value * RVal)2437*9880d681SAndroid Build Coastguard Worker static bool isCompatibleIVType(Value *LVal, Value *RVal) {
2438*9880d681SAndroid Build Coastguard Worker   Type *LType = LVal->getType();
2439*9880d681SAndroid Build Coastguard Worker   Type *RType = RVal->getType();
2440*9880d681SAndroid Build Coastguard Worker   return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
2441*9880d681SAndroid Build Coastguard Worker }
2442*9880d681SAndroid Build Coastguard Worker 
2443*9880d681SAndroid Build Coastguard Worker /// Return an approximation of this SCEV expression's "base", or NULL for any
2444*9880d681SAndroid Build Coastguard Worker /// constant. Returning the expression itself is conservative. Returning a
2445*9880d681SAndroid Build Coastguard Worker /// deeper subexpression is more precise and valid as long as it isn't less
2446*9880d681SAndroid Build Coastguard Worker /// complex than another subexpression. For expressions involving multiple
2447*9880d681SAndroid Build Coastguard Worker /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2448*9880d681SAndroid Build Coastguard Worker /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2449*9880d681SAndroid Build Coastguard Worker /// IVInc==b-a.
2450*9880d681SAndroid Build Coastguard Worker ///
2451*9880d681SAndroid Build Coastguard Worker /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2452*9880d681SAndroid Build Coastguard Worker /// SCEVUnknown, we simply return the rightmost SCEV operand.
getExprBase(const SCEV * S)2453*9880d681SAndroid Build Coastguard Worker static const SCEV *getExprBase(const SCEV *S) {
2454*9880d681SAndroid Build Coastguard Worker   switch (S->getSCEVType()) {
2455*9880d681SAndroid Build Coastguard Worker   default: // uncluding scUnknown.
2456*9880d681SAndroid Build Coastguard Worker     return S;
2457*9880d681SAndroid Build Coastguard Worker   case scConstant:
2458*9880d681SAndroid Build Coastguard Worker     return nullptr;
2459*9880d681SAndroid Build Coastguard Worker   case scTruncate:
2460*9880d681SAndroid Build Coastguard Worker     return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2461*9880d681SAndroid Build Coastguard Worker   case scZeroExtend:
2462*9880d681SAndroid Build Coastguard Worker     return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2463*9880d681SAndroid Build Coastguard Worker   case scSignExtend:
2464*9880d681SAndroid Build Coastguard Worker     return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
2465*9880d681SAndroid Build Coastguard Worker   case scAddExpr: {
2466*9880d681SAndroid Build Coastguard Worker     // Skip over scaled operands (scMulExpr) to follow add operands as long as
2467*9880d681SAndroid Build Coastguard Worker     // there's nothing more complex.
2468*9880d681SAndroid Build Coastguard Worker     // FIXME: not sure if we want to recognize negation.
2469*9880d681SAndroid Build Coastguard Worker     const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2470*9880d681SAndroid Build Coastguard Worker     for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
2471*9880d681SAndroid Build Coastguard Worker            E(Add->op_begin()); I != E; ++I) {
2472*9880d681SAndroid Build Coastguard Worker       const SCEV *SubExpr = *I;
2473*9880d681SAndroid Build Coastguard Worker       if (SubExpr->getSCEVType() == scAddExpr)
2474*9880d681SAndroid Build Coastguard Worker         return getExprBase(SubExpr);
2475*9880d681SAndroid Build Coastguard Worker 
2476*9880d681SAndroid Build Coastguard Worker       if (SubExpr->getSCEVType() != scMulExpr)
2477*9880d681SAndroid Build Coastguard Worker         return SubExpr;
2478*9880d681SAndroid Build Coastguard Worker     }
2479*9880d681SAndroid Build Coastguard Worker     return S; // all operands are scaled, be conservative.
2480*9880d681SAndroid Build Coastguard Worker   }
2481*9880d681SAndroid Build Coastguard Worker   case scAddRecExpr:
2482*9880d681SAndroid Build Coastguard Worker     return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
2483*9880d681SAndroid Build Coastguard Worker   }
2484*9880d681SAndroid Build Coastguard Worker }
2485*9880d681SAndroid Build Coastguard Worker 
2486*9880d681SAndroid Build Coastguard Worker /// Return true if the chain increment is profitable to expand into a loop
2487*9880d681SAndroid Build Coastguard Worker /// invariant value, which may require its own register. A profitable chain
2488*9880d681SAndroid Build Coastguard Worker /// increment will be an offset relative to the same base. We allow such offsets
2489*9880d681SAndroid Build Coastguard Worker /// to potentially be used as chain increment as long as it's not obviously
2490*9880d681SAndroid Build Coastguard Worker /// expensive to expand using real instructions.
isProfitableIncrement(const SCEV * OperExpr,const SCEV * IncExpr,ScalarEvolution & SE)2491*9880d681SAndroid Build Coastguard Worker bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
2492*9880d681SAndroid Build Coastguard Worker                                     const SCEV *IncExpr,
2493*9880d681SAndroid Build Coastguard Worker                                     ScalarEvolution &SE) {
2494*9880d681SAndroid Build Coastguard Worker   // Aggressively form chains when -stress-ivchain.
2495*9880d681SAndroid Build Coastguard Worker   if (StressIVChain)
2496*9880d681SAndroid Build Coastguard Worker     return true;
2497*9880d681SAndroid Build Coastguard Worker 
2498*9880d681SAndroid Build Coastguard Worker   // Do not replace a constant offset from IV head with a nonconstant IV
2499*9880d681SAndroid Build Coastguard Worker   // increment.
2500*9880d681SAndroid Build Coastguard Worker   if (!isa<SCEVConstant>(IncExpr)) {
2501*9880d681SAndroid Build Coastguard Worker     const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
2502*9880d681SAndroid Build Coastguard Worker     if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
2503*9880d681SAndroid Build Coastguard Worker       return 0;
2504*9880d681SAndroid Build Coastguard Worker   }
2505*9880d681SAndroid Build Coastguard Worker 
2506*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV*, 8> Processed;
2507*9880d681SAndroid Build Coastguard Worker   return !isHighCostExpansion(IncExpr, Processed, SE);
2508*9880d681SAndroid Build Coastguard Worker }
2509*9880d681SAndroid Build Coastguard Worker 
2510*9880d681SAndroid Build Coastguard Worker /// Return true if the number of registers needed for the chain is estimated to
2511*9880d681SAndroid Build Coastguard Worker /// be less than the number required for the individual IV users. First prohibit
2512*9880d681SAndroid Build Coastguard Worker /// any IV users that keep the IV live across increments (the Users set should
2513*9880d681SAndroid Build Coastguard Worker /// be empty). Next count the number and type of increments in the chain.
2514*9880d681SAndroid Build Coastguard Worker ///
2515*9880d681SAndroid Build Coastguard Worker /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
2516*9880d681SAndroid Build Coastguard Worker /// effectively use postinc addressing modes. Only consider it profitable it the
2517*9880d681SAndroid Build Coastguard Worker /// increments can be computed in fewer registers when chained.
2518*9880d681SAndroid Build Coastguard Worker ///
2519*9880d681SAndroid Build Coastguard Worker /// TODO: Consider IVInc free if it's already used in another chains.
2520*9880d681SAndroid Build Coastguard Worker static bool
isProfitableChain(IVChain & Chain,SmallPtrSetImpl<Instruction * > & Users,ScalarEvolution & SE,const TargetTransformInfo & TTI)2521*9880d681SAndroid Build Coastguard Worker isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
2522*9880d681SAndroid Build Coastguard Worker                   ScalarEvolution &SE, const TargetTransformInfo &TTI) {
2523*9880d681SAndroid Build Coastguard Worker   if (StressIVChain)
2524*9880d681SAndroid Build Coastguard Worker     return true;
2525*9880d681SAndroid Build Coastguard Worker 
2526*9880d681SAndroid Build Coastguard Worker   if (!Chain.hasIncs())
2527*9880d681SAndroid Build Coastguard Worker     return false;
2528*9880d681SAndroid Build Coastguard Worker 
2529*9880d681SAndroid Build Coastguard Worker   if (!Users.empty()) {
2530*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
2531*9880d681SAndroid Build Coastguard Worker           for (Instruction *Inst : Users) {
2532*9880d681SAndroid Build Coastguard Worker             dbgs() << "  " << *Inst << "\n";
2533*9880d681SAndroid Build Coastguard Worker           });
2534*9880d681SAndroid Build Coastguard Worker     return false;
2535*9880d681SAndroid Build Coastguard Worker   }
2536*9880d681SAndroid Build Coastguard Worker   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2537*9880d681SAndroid Build Coastguard Worker 
2538*9880d681SAndroid Build Coastguard Worker   // The chain itself may require a register, so intialize cost to 1.
2539*9880d681SAndroid Build Coastguard Worker   int cost = 1;
2540*9880d681SAndroid Build Coastguard Worker 
2541*9880d681SAndroid Build Coastguard Worker   // A complete chain likely eliminates the need for keeping the original IV in
2542*9880d681SAndroid Build Coastguard Worker   // a register. LSR does not currently know how to form a complete chain unless
2543*9880d681SAndroid Build Coastguard Worker   // the header phi already exists.
2544*9880d681SAndroid Build Coastguard Worker   if (isa<PHINode>(Chain.tailUserInst())
2545*9880d681SAndroid Build Coastguard Worker       && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
2546*9880d681SAndroid Build Coastguard Worker     --cost;
2547*9880d681SAndroid Build Coastguard Worker   }
2548*9880d681SAndroid Build Coastguard Worker   const SCEV *LastIncExpr = nullptr;
2549*9880d681SAndroid Build Coastguard Worker   unsigned NumConstIncrements = 0;
2550*9880d681SAndroid Build Coastguard Worker   unsigned NumVarIncrements = 0;
2551*9880d681SAndroid Build Coastguard Worker   unsigned NumReusedIncrements = 0;
2552*9880d681SAndroid Build Coastguard Worker   for (const IVInc &Inc : Chain) {
2553*9880d681SAndroid Build Coastguard Worker     if (Inc.IncExpr->isZero())
2554*9880d681SAndroid Build Coastguard Worker       continue;
2555*9880d681SAndroid Build Coastguard Worker 
2556*9880d681SAndroid Build Coastguard Worker     // Incrementing by zero or some constant is neutral. We assume constants can
2557*9880d681SAndroid Build Coastguard Worker     // be folded into an addressing mode or an add's immediate operand.
2558*9880d681SAndroid Build Coastguard Worker     if (isa<SCEVConstant>(Inc.IncExpr)) {
2559*9880d681SAndroid Build Coastguard Worker       ++NumConstIncrements;
2560*9880d681SAndroid Build Coastguard Worker       continue;
2561*9880d681SAndroid Build Coastguard Worker     }
2562*9880d681SAndroid Build Coastguard Worker 
2563*9880d681SAndroid Build Coastguard Worker     if (Inc.IncExpr == LastIncExpr)
2564*9880d681SAndroid Build Coastguard Worker       ++NumReusedIncrements;
2565*9880d681SAndroid Build Coastguard Worker     else
2566*9880d681SAndroid Build Coastguard Worker       ++NumVarIncrements;
2567*9880d681SAndroid Build Coastguard Worker 
2568*9880d681SAndroid Build Coastguard Worker     LastIncExpr = Inc.IncExpr;
2569*9880d681SAndroid Build Coastguard Worker   }
2570*9880d681SAndroid Build Coastguard Worker   // An IV chain with a single increment is handled by LSR's postinc
2571*9880d681SAndroid Build Coastguard Worker   // uses. However, a chain with multiple increments requires keeping the IV's
2572*9880d681SAndroid Build Coastguard Worker   // value live longer than it needs to be if chained.
2573*9880d681SAndroid Build Coastguard Worker   if (NumConstIncrements > 1)
2574*9880d681SAndroid Build Coastguard Worker     --cost;
2575*9880d681SAndroid Build Coastguard Worker 
2576*9880d681SAndroid Build Coastguard Worker   // Materializing increment expressions in the preheader that didn't exist in
2577*9880d681SAndroid Build Coastguard Worker   // the original code may cost a register. For example, sign-extended array
2578*9880d681SAndroid Build Coastguard Worker   // indices can produce ridiculous increments like this:
2579*9880d681SAndroid Build Coastguard Worker   // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
2580*9880d681SAndroid Build Coastguard Worker   cost += NumVarIncrements;
2581*9880d681SAndroid Build Coastguard Worker 
2582*9880d681SAndroid Build Coastguard Worker   // Reusing variable increments likely saves a register to hold the multiple of
2583*9880d681SAndroid Build Coastguard Worker   // the stride.
2584*9880d681SAndroid Build Coastguard Worker   cost -= NumReusedIncrements;
2585*9880d681SAndroid Build Coastguard Worker 
2586*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
2587*9880d681SAndroid Build Coastguard Worker                << "\n");
2588*9880d681SAndroid Build Coastguard Worker 
2589*9880d681SAndroid Build Coastguard Worker   return cost < 0;
2590*9880d681SAndroid Build Coastguard Worker }
2591*9880d681SAndroid Build Coastguard Worker 
2592*9880d681SAndroid Build Coastguard Worker /// Add this IV user to an existing chain or make it the head of a new chain.
ChainInstruction(Instruction * UserInst,Instruction * IVOper,SmallVectorImpl<ChainUsers> & ChainUsersVec)2593*9880d681SAndroid Build Coastguard Worker void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2594*9880d681SAndroid Build Coastguard Worker                                    SmallVectorImpl<ChainUsers> &ChainUsersVec) {
2595*9880d681SAndroid Build Coastguard Worker   // When IVs are used as types of varying widths, they are generally converted
2596*9880d681SAndroid Build Coastguard Worker   // to a wider type with some uses remaining narrow under a (free) trunc.
2597*9880d681SAndroid Build Coastguard Worker   Value *const NextIV = getWideOperand(IVOper);
2598*9880d681SAndroid Build Coastguard Worker   const SCEV *const OperExpr = SE.getSCEV(NextIV);
2599*9880d681SAndroid Build Coastguard Worker   const SCEV *const OperExprBase = getExprBase(OperExpr);
2600*9880d681SAndroid Build Coastguard Worker 
2601*9880d681SAndroid Build Coastguard Worker   // Visit all existing chains. Check if its IVOper can be computed as a
2602*9880d681SAndroid Build Coastguard Worker   // profitable loop invariant increment from the last link in the Chain.
2603*9880d681SAndroid Build Coastguard Worker   unsigned ChainIdx = 0, NChains = IVChainVec.size();
2604*9880d681SAndroid Build Coastguard Worker   const SCEV *LastIncExpr = nullptr;
2605*9880d681SAndroid Build Coastguard Worker   for (; ChainIdx < NChains; ++ChainIdx) {
2606*9880d681SAndroid Build Coastguard Worker     IVChain &Chain = IVChainVec[ChainIdx];
2607*9880d681SAndroid Build Coastguard Worker 
2608*9880d681SAndroid Build Coastguard Worker     // Prune the solution space aggressively by checking that both IV operands
2609*9880d681SAndroid Build Coastguard Worker     // are expressions that operate on the same unscaled SCEVUnknown. This
2610*9880d681SAndroid Build Coastguard Worker     // "base" will be canceled by the subsequent getMinusSCEV call. Checking
2611*9880d681SAndroid Build Coastguard Worker     // first avoids creating extra SCEV expressions.
2612*9880d681SAndroid Build Coastguard Worker     if (!StressIVChain && Chain.ExprBase != OperExprBase)
2613*9880d681SAndroid Build Coastguard Worker       continue;
2614*9880d681SAndroid Build Coastguard Worker 
2615*9880d681SAndroid Build Coastguard Worker     Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
2616*9880d681SAndroid Build Coastguard Worker     if (!isCompatibleIVType(PrevIV, NextIV))
2617*9880d681SAndroid Build Coastguard Worker       continue;
2618*9880d681SAndroid Build Coastguard Worker 
2619*9880d681SAndroid Build Coastguard Worker     // A phi node terminates a chain.
2620*9880d681SAndroid Build Coastguard Worker     if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
2621*9880d681SAndroid Build Coastguard Worker       continue;
2622*9880d681SAndroid Build Coastguard Worker 
2623*9880d681SAndroid Build Coastguard Worker     // The increment must be loop-invariant so it can be kept in a register.
2624*9880d681SAndroid Build Coastguard Worker     const SCEV *PrevExpr = SE.getSCEV(PrevIV);
2625*9880d681SAndroid Build Coastguard Worker     const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
2626*9880d681SAndroid Build Coastguard Worker     if (!SE.isLoopInvariant(IncExpr, L))
2627*9880d681SAndroid Build Coastguard Worker       continue;
2628*9880d681SAndroid Build Coastguard Worker 
2629*9880d681SAndroid Build Coastguard Worker     if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
2630*9880d681SAndroid Build Coastguard Worker       LastIncExpr = IncExpr;
2631*9880d681SAndroid Build Coastguard Worker       break;
2632*9880d681SAndroid Build Coastguard Worker     }
2633*9880d681SAndroid Build Coastguard Worker   }
2634*9880d681SAndroid Build Coastguard Worker   // If we haven't found a chain, create a new one, unless we hit the max. Don't
2635*9880d681SAndroid Build Coastguard Worker   // bother for phi nodes, because they must be last in the chain.
2636*9880d681SAndroid Build Coastguard Worker   if (ChainIdx == NChains) {
2637*9880d681SAndroid Build Coastguard Worker     if (isa<PHINode>(UserInst))
2638*9880d681SAndroid Build Coastguard Worker       return;
2639*9880d681SAndroid Build Coastguard Worker     if (NChains >= MaxChains && !StressIVChain) {
2640*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "IV Chain Limit\n");
2641*9880d681SAndroid Build Coastguard Worker       return;
2642*9880d681SAndroid Build Coastguard Worker     }
2643*9880d681SAndroid Build Coastguard Worker     LastIncExpr = OperExpr;
2644*9880d681SAndroid Build Coastguard Worker     // IVUsers may have skipped over sign/zero extensions. We don't currently
2645*9880d681SAndroid Build Coastguard Worker     // attempt to form chains involving extensions unless they can be hoisted
2646*9880d681SAndroid Build Coastguard Worker     // into this loop's AddRec.
2647*9880d681SAndroid Build Coastguard Worker     if (!isa<SCEVAddRecExpr>(LastIncExpr))
2648*9880d681SAndroid Build Coastguard Worker       return;
2649*9880d681SAndroid Build Coastguard Worker     ++NChains;
2650*9880d681SAndroid Build Coastguard Worker     IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
2651*9880d681SAndroid Build Coastguard Worker                                  OperExprBase));
2652*9880d681SAndroid Build Coastguard Worker     ChainUsersVec.resize(NChains);
2653*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
2654*9880d681SAndroid Build Coastguard Worker                  << ") IV=" << *LastIncExpr << "\n");
2655*9880d681SAndroid Build Coastguard Worker   } else {
2656*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst
2657*9880d681SAndroid Build Coastguard Worker                  << ") IV+" << *LastIncExpr << "\n");
2658*9880d681SAndroid Build Coastguard Worker     // Add this IV user to the end of the chain.
2659*9880d681SAndroid Build Coastguard Worker     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
2660*9880d681SAndroid Build Coastguard Worker   }
2661*9880d681SAndroid Build Coastguard Worker   IVChain &Chain = IVChainVec[ChainIdx];
2662*9880d681SAndroid Build Coastguard Worker 
2663*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
2664*9880d681SAndroid Build Coastguard Worker   // This chain's NearUsers become FarUsers.
2665*9880d681SAndroid Build Coastguard Worker   if (!LastIncExpr->isZero()) {
2666*9880d681SAndroid Build Coastguard Worker     ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
2667*9880d681SAndroid Build Coastguard Worker                                             NearUsers.end());
2668*9880d681SAndroid Build Coastguard Worker     NearUsers.clear();
2669*9880d681SAndroid Build Coastguard Worker   }
2670*9880d681SAndroid Build Coastguard Worker 
2671*9880d681SAndroid Build Coastguard Worker   // All other uses of IVOperand become near uses of the chain.
2672*9880d681SAndroid Build Coastguard Worker   // We currently ignore intermediate values within SCEV expressions, assuming
2673*9880d681SAndroid Build Coastguard Worker   // they will eventually be used be the current chain, or can be computed
2674*9880d681SAndroid Build Coastguard Worker   // from one of the chain increments. To be more precise we could
2675*9880d681SAndroid Build Coastguard Worker   // transitively follow its user and only add leaf IV users to the set.
2676*9880d681SAndroid Build Coastguard Worker   for (User *U : IVOper->users()) {
2677*9880d681SAndroid Build Coastguard Worker     Instruction *OtherUse = dyn_cast<Instruction>(U);
2678*9880d681SAndroid Build Coastguard Worker     if (!OtherUse)
2679*9880d681SAndroid Build Coastguard Worker       continue;
2680*9880d681SAndroid Build Coastguard Worker     // Uses in the chain will no longer be uses if the chain is formed.
2681*9880d681SAndroid Build Coastguard Worker     // Include the head of the chain in this iteration (not Chain.begin()).
2682*9880d681SAndroid Build Coastguard Worker     IVChain::const_iterator IncIter = Chain.Incs.begin();
2683*9880d681SAndroid Build Coastguard Worker     IVChain::const_iterator IncEnd = Chain.Incs.end();
2684*9880d681SAndroid Build Coastguard Worker     for( ; IncIter != IncEnd; ++IncIter) {
2685*9880d681SAndroid Build Coastguard Worker       if (IncIter->UserInst == OtherUse)
2686*9880d681SAndroid Build Coastguard Worker         break;
2687*9880d681SAndroid Build Coastguard Worker     }
2688*9880d681SAndroid Build Coastguard Worker     if (IncIter != IncEnd)
2689*9880d681SAndroid Build Coastguard Worker       continue;
2690*9880d681SAndroid Build Coastguard Worker 
2691*9880d681SAndroid Build Coastguard Worker     if (SE.isSCEVable(OtherUse->getType())
2692*9880d681SAndroid Build Coastguard Worker         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
2693*9880d681SAndroid Build Coastguard Worker         && IU.isIVUserOrOperand(OtherUse)) {
2694*9880d681SAndroid Build Coastguard Worker       continue;
2695*9880d681SAndroid Build Coastguard Worker     }
2696*9880d681SAndroid Build Coastguard Worker     NearUsers.insert(OtherUse);
2697*9880d681SAndroid Build Coastguard Worker   }
2698*9880d681SAndroid Build Coastguard Worker 
2699*9880d681SAndroid Build Coastguard Worker   // Since this user is part of the chain, it's no longer considered a use
2700*9880d681SAndroid Build Coastguard Worker   // of the chain.
2701*9880d681SAndroid Build Coastguard Worker   ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
2702*9880d681SAndroid Build Coastguard Worker }
2703*9880d681SAndroid Build Coastguard Worker 
2704*9880d681SAndroid Build Coastguard Worker /// Populate the vector of Chains.
2705*9880d681SAndroid Build Coastguard Worker ///
2706*9880d681SAndroid Build Coastguard Worker /// This decreases ILP at the architecture level. Targets with ample registers,
2707*9880d681SAndroid Build Coastguard Worker /// multiple memory ports, and no register renaming probably don't want
2708*9880d681SAndroid Build Coastguard Worker /// this. However, such targets should probably disable LSR altogether.
2709*9880d681SAndroid Build Coastguard Worker ///
2710*9880d681SAndroid Build Coastguard Worker /// The job of LSR is to make a reasonable choice of induction variables across
2711*9880d681SAndroid Build Coastguard Worker /// the loop. Subsequent passes can easily "unchain" computation exposing more
2712*9880d681SAndroid Build Coastguard Worker /// ILP *within the loop* if the target wants it.
2713*9880d681SAndroid Build Coastguard Worker ///
2714*9880d681SAndroid Build Coastguard Worker /// Finding the best IV chain is potentially a scheduling problem. Since LSR
2715*9880d681SAndroid Build Coastguard Worker /// will not reorder memory operations, it will recognize this as a chain, but
2716*9880d681SAndroid Build Coastguard Worker /// will generate redundant IV increments. Ideally this would be corrected later
2717*9880d681SAndroid Build Coastguard Worker /// by a smart scheduler:
2718*9880d681SAndroid Build Coastguard Worker ///        = A[i]
2719*9880d681SAndroid Build Coastguard Worker ///        = A[i+x]
2720*9880d681SAndroid Build Coastguard Worker /// A[i]   =
2721*9880d681SAndroid Build Coastguard Worker /// A[i+x] =
2722*9880d681SAndroid Build Coastguard Worker ///
2723*9880d681SAndroid Build Coastguard Worker /// TODO: Walk the entire domtree within this loop, not just the path to the
2724*9880d681SAndroid Build Coastguard Worker /// loop latch. This will discover chains on side paths, but requires
2725*9880d681SAndroid Build Coastguard Worker /// maintaining multiple copies of the Chains state.
CollectChains()2726*9880d681SAndroid Build Coastguard Worker void LSRInstance::CollectChains() {
2727*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Collecting IV Chains.\n");
2728*9880d681SAndroid Build Coastguard Worker   SmallVector<ChainUsers, 8> ChainUsersVec;
2729*9880d681SAndroid Build Coastguard Worker 
2730*9880d681SAndroid Build Coastguard Worker   SmallVector<BasicBlock *,8> LatchPath;
2731*9880d681SAndroid Build Coastguard Worker   BasicBlock *LoopHeader = L->getHeader();
2732*9880d681SAndroid Build Coastguard Worker   for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
2733*9880d681SAndroid Build Coastguard Worker        Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
2734*9880d681SAndroid Build Coastguard Worker     LatchPath.push_back(Rung->getBlock());
2735*9880d681SAndroid Build Coastguard Worker   }
2736*9880d681SAndroid Build Coastguard Worker   LatchPath.push_back(LoopHeader);
2737*9880d681SAndroid Build Coastguard Worker 
2738*9880d681SAndroid Build Coastguard Worker   // Walk the instruction stream from the loop header to the loop latch.
2739*9880d681SAndroid Build Coastguard Worker   for (BasicBlock *BB : reverse(LatchPath)) {
2740*9880d681SAndroid Build Coastguard Worker     for (Instruction &I : *BB) {
2741*9880d681SAndroid Build Coastguard Worker       // Skip instructions that weren't seen by IVUsers analysis.
2742*9880d681SAndroid Build Coastguard Worker       if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
2743*9880d681SAndroid Build Coastguard Worker         continue;
2744*9880d681SAndroid Build Coastguard Worker 
2745*9880d681SAndroid Build Coastguard Worker       // Ignore users that are part of a SCEV expression. This way we only
2746*9880d681SAndroid Build Coastguard Worker       // consider leaf IV Users. This effectively rediscovers a portion of
2747*9880d681SAndroid Build Coastguard Worker       // IVUsers analysis but in program order this time.
2748*9880d681SAndroid Build Coastguard Worker       if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
2749*9880d681SAndroid Build Coastguard Worker         continue;
2750*9880d681SAndroid Build Coastguard Worker 
2751*9880d681SAndroid Build Coastguard Worker       // Remove this instruction from any NearUsers set it may be in.
2752*9880d681SAndroid Build Coastguard Worker       for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
2753*9880d681SAndroid Build Coastguard Worker            ChainIdx < NChains; ++ChainIdx) {
2754*9880d681SAndroid Build Coastguard Worker         ChainUsersVec[ChainIdx].NearUsers.erase(&I);
2755*9880d681SAndroid Build Coastguard Worker       }
2756*9880d681SAndroid Build Coastguard Worker       // Search for operands that can be chained.
2757*9880d681SAndroid Build Coastguard Worker       SmallPtrSet<Instruction*, 4> UniqueOperands;
2758*9880d681SAndroid Build Coastguard Worker       User::op_iterator IVOpEnd = I.op_end();
2759*9880d681SAndroid Build Coastguard Worker       User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
2760*9880d681SAndroid Build Coastguard Worker       while (IVOpIter != IVOpEnd) {
2761*9880d681SAndroid Build Coastguard Worker         Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
2762*9880d681SAndroid Build Coastguard Worker         if (UniqueOperands.insert(IVOpInst).second)
2763*9880d681SAndroid Build Coastguard Worker           ChainInstruction(&I, IVOpInst, ChainUsersVec);
2764*9880d681SAndroid Build Coastguard Worker         IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2765*9880d681SAndroid Build Coastguard Worker       }
2766*9880d681SAndroid Build Coastguard Worker     } // Continue walking down the instructions.
2767*9880d681SAndroid Build Coastguard Worker   } // Continue walking down the domtree.
2768*9880d681SAndroid Build Coastguard Worker   // Visit phi backedges to determine if the chain can generate the IV postinc.
2769*9880d681SAndroid Build Coastguard Worker   for (BasicBlock::iterator I = L->getHeader()->begin();
2770*9880d681SAndroid Build Coastguard Worker        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
2771*9880d681SAndroid Build Coastguard Worker     if (!SE.isSCEVable(PN->getType()))
2772*9880d681SAndroid Build Coastguard Worker       continue;
2773*9880d681SAndroid Build Coastguard Worker 
2774*9880d681SAndroid Build Coastguard Worker     Instruction *IncV =
2775*9880d681SAndroid Build Coastguard Worker       dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
2776*9880d681SAndroid Build Coastguard Worker     if (IncV)
2777*9880d681SAndroid Build Coastguard Worker       ChainInstruction(PN, IncV, ChainUsersVec);
2778*9880d681SAndroid Build Coastguard Worker   }
2779*9880d681SAndroid Build Coastguard Worker   // Remove any unprofitable chains.
2780*9880d681SAndroid Build Coastguard Worker   unsigned ChainIdx = 0;
2781*9880d681SAndroid Build Coastguard Worker   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
2782*9880d681SAndroid Build Coastguard Worker        UsersIdx < NChains; ++UsersIdx) {
2783*9880d681SAndroid Build Coastguard Worker     if (!isProfitableChain(IVChainVec[UsersIdx],
2784*9880d681SAndroid Build Coastguard Worker                            ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
2785*9880d681SAndroid Build Coastguard Worker       continue;
2786*9880d681SAndroid Build Coastguard Worker     // Preserve the chain at UsesIdx.
2787*9880d681SAndroid Build Coastguard Worker     if (ChainIdx != UsersIdx)
2788*9880d681SAndroid Build Coastguard Worker       IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
2789*9880d681SAndroid Build Coastguard Worker     FinalizeChain(IVChainVec[ChainIdx]);
2790*9880d681SAndroid Build Coastguard Worker     ++ChainIdx;
2791*9880d681SAndroid Build Coastguard Worker   }
2792*9880d681SAndroid Build Coastguard Worker   IVChainVec.resize(ChainIdx);
2793*9880d681SAndroid Build Coastguard Worker }
2794*9880d681SAndroid Build Coastguard Worker 
FinalizeChain(IVChain & Chain)2795*9880d681SAndroid Build Coastguard Worker void LSRInstance::FinalizeChain(IVChain &Chain) {
2796*9880d681SAndroid Build Coastguard Worker   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
2797*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
2798*9880d681SAndroid Build Coastguard Worker 
2799*9880d681SAndroid Build Coastguard Worker   for (const IVInc &Inc : Chain) {
2800*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "        Inc: " << Inc.UserInst << "\n");
2801*9880d681SAndroid Build Coastguard Worker     auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
2802*9880d681SAndroid Build Coastguard Worker                           Inc.IVOperand);
2803*9880d681SAndroid Build Coastguard Worker     assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
2804*9880d681SAndroid Build Coastguard Worker     IVIncSet.insert(UseI);
2805*9880d681SAndroid Build Coastguard Worker   }
2806*9880d681SAndroid Build Coastguard Worker }
2807*9880d681SAndroid Build Coastguard Worker 
2808*9880d681SAndroid Build Coastguard Worker /// Return true if the IVInc can be folded into an addressing mode.
canFoldIVIncExpr(const SCEV * IncExpr,Instruction * UserInst,Value * Operand,const TargetTransformInfo & TTI)2809*9880d681SAndroid Build Coastguard Worker static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
2810*9880d681SAndroid Build Coastguard Worker                              Value *Operand, const TargetTransformInfo &TTI) {
2811*9880d681SAndroid Build Coastguard Worker   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
2812*9880d681SAndroid Build Coastguard Worker   if (!IncConst || !isAddressUse(UserInst, Operand))
2813*9880d681SAndroid Build Coastguard Worker     return false;
2814*9880d681SAndroid Build Coastguard Worker 
2815*9880d681SAndroid Build Coastguard Worker   if (IncConst->getAPInt().getMinSignedBits() > 64)
2816*9880d681SAndroid Build Coastguard Worker     return false;
2817*9880d681SAndroid Build Coastguard Worker 
2818*9880d681SAndroid Build Coastguard Worker   MemAccessTy AccessTy = getAccessType(UserInst);
2819*9880d681SAndroid Build Coastguard Worker   int64_t IncOffset = IncConst->getValue()->getSExtValue();
2820*9880d681SAndroid Build Coastguard Worker   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
2821*9880d681SAndroid Build Coastguard Worker                         IncOffset, /*HaseBaseReg=*/false))
2822*9880d681SAndroid Build Coastguard Worker     return false;
2823*9880d681SAndroid Build Coastguard Worker 
2824*9880d681SAndroid Build Coastguard Worker   return true;
2825*9880d681SAndroid Build Coastguard Worker }
2826*9880d681SAndroid Build Coastguard Worker 
2827*9880d681SAndroid Build Coastguard Worker /// Generate an add or subtract for each IVInc in a chain to materialize the IV
2828*9880d681SAndroid Build Coastguard Worker /// user's operand from the previous IV user's operand.
GenerateIVChain(const IVChain & Chain,SCEVExpander & Rewriter,SmallVectorImpl<WeakVH> & DeadInsts)2829*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
2830*9880d681SAndroid Build Coastguard Worker                                   SmallVectorImpl<WeakVH> &DeadInsts) {
2831*9880d681SAndroid Build Coastguard Worker   // Find the new IVOperand for the head of the chain. It may have been replaced
2832*9880d681SAndroid Build Coastguard Worker   // by LSR.
2833*9880d681SAndroid Build Coastguard Worker   const IVInc &Head = Chain.Incs[0];
2834*9880d681SAndroid Build Coastguard Worker   User::op_iterator IVOpEnd = Head.UserInst->op_end();
2835*9880d681SAndroid Build Coastguard Worker   // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
2836*9880d681SAndroid Build Coastguard Worker   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
2837*9880d681SAndroid Build Coastguard Worker                                              IVOpEnd, L, SE);
2838*9880d681SAndroid Build Coastguard Worker   Value *IVSrc = nullptr;
2839*9880d681SAndroid Build Coastguard Worker   while (IVOpIter != IVOpEnd) {
2840*9880d681SAndroid Build Coastguard Worker     IVSrc = getWideOperand(*IVOpIter);
2841*9880d681SAndroid Build Coastguard Worker 
2842*9880d681SAndroid Build Coastguard Worker     // If this operand computes the expression that the chain needs, we may use
2843*9880d681SAndroid Build Coastguard Worker     // it. (Check this after setting IVSrc which is used below.)
2844*9880d681SAndroid Build Coastguard Worker     //
2845*9880d681SAndroid Build Coastguard Worker     // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
2846*9880d681SAndroid Build Coastguard Worker     // narrow for the chain, so we can no longer use it. We do allow using a
2847*9880d681SAndroid Build Coastguard Worker     // wider phi, assuming the LSR checked for free truncation. In that case we
2848*9880d681SAndroid Build Coastguard Worker     // should already have a truncate on this operand such that
2849*9880d681SAndroid Build Coastguard Worker     // getSCEV(IVSrc) == IncExpr.
2850*9880d681SAndroid Build Coastguard Worker     if (SE.getSCEV(*IVOpIter) == Head.IncExpr
2851*9880d681SAndroid Build Coastguard Worker         || SE.getSCEV(IVSrc) == Head.IncExpr) {
2852*9880d681SAndroid Build Coastguard Worker       break;
2853*9880d681SAndroid Build Coastguard Worker     }
2854*9880d681SAndroid Build Coastguard Worker     IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
2855*9880d681SAndroid Build Coastguard Worker   }
2856*9880d681SAndroid Build Coastguard Worker   if (IVOpIter == IVOpEnd) {
2857*9880d681SAndroid Build Coastguard Worker     // Gracefully give up on this chain.
2858*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
2859*9880d681SAndroid Build Coastguard Worker     return;
2860*9880d681SAndroid Build Coastguard Worker   }
2861*9880d681SAndroid Build Coastguard Worker 
2862*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
2863*9880d681SAndroid Build Coastguard Worker   Type *IVTy = IVSrc->getType();
2864*9880d681SAndroid Build Coastguard Worker   Type *IntTy = SE.getEffectiveSCEVType(IVTy);
2865*9880d681SAndroid Build Coastguard Worker   const SCEV *LeftOverExpr = nullptr;
2866*9880d681SAndroid Build Coastguard Worker   for (const IVInc &Inc : Chain) {
2867*9880d681SAndroid Build Coastguard Worker     Instruction *InsertPt = Inc.UserInst;
2868*9880d681SAndroid Build Coastguard Worker     if (isa<PHINode>(InsertPt))
2869*9880d681SAndroid Build Coastguard Worker       InsertPt = L->getLoopLatch()->getTerminator();
2870*9880d681SAndroid Build Coastguard Worker 
2871*9880d681SAndroid Build Coastguard Worker     // IVOper will replace the current IV User's operand. IVSrc is the IV
2872*9880d681SAndroid Build Coastguard Worker     // value currently held in a register.
2873*9880d681SAndroid Build Coastguard Worker     Value *IVOper = IVSrc;
2874*9880d681SAndroid Build Coastguard Worker     if (!Inc.IncExpr->isZero()) {
2875*9880d681SAndroid Build Coastguard Worker       // IncExpr was the result of subtraction of two narrow values, so must
2876*9880d681SAndroid Build Coastguard Worker       // be signed.
2877*9880d681SAndroid Build Coastguard Worker       const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
2878*9880d681SAndroid Build Coastguard Worker       LeftOverExpr = LeftOverExpr ?
2879*9880d681SAndroid Build Coastguard Worker         SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
2880*9880d681SAndroid Build Coastguard Worker     }
2881*9880d681SAndroid Build Coastguard Worker     if (LeftOverExpr && !LeftOverExpr->isZero()) {
2882*9880d681SAndroid Build Coastguard Worker       // Expand the IV increment.
2883*9880d681SAndroid Build Coastguard Worker       Rewriter.clearPostInc();
2884*9880d681SAndroid Build Coastguard Worker       Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
2885*9880d681SAndroid Build Coastguard Worker       const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
2886*9880d681SAndroid Build Coastguard Worker                                              SE.getUnknown(IncV));
2887*9880d681SAndroid Build Coastguard Worker       IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
2888*9880d681SAndroid Build Coastguard Worker 
2889*9880d681SAndroid Build Coastguard Worker       // If an IV increment can't be folded, use it as the next IV value.
2890*9880d681SAndroid Build Coastguard Worker       if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
2891*9880d681SAndroid Build Coastguard Worker         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
2892*9880d681SAndroid Build Coastguard Worker         IVSrc = IVOper;
2893*9880d681SAndroid Build Coastguard Worker         LeftOverExpr = nullptr;
2894*9880d681SAndroid Build Coastguard Worker       }
2895*9880d681SAndroid Build Coastguard Worker     }
2896*9880d681SAndroid Build Coastguard Worker     Type *OperTy = Inc.IVOperand->getType();
2897*9880d681SAndroid Build Coastguard Worker     if (IVTy != OperTy) {
2898*9880d681SAndroid Build Coastguard Worker       assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
2899*9880d681SAndroid Build Coastguard Worker              "cannot extend a chained IV");
2900*9880d681SAndroid Build Coastguard Worker       IRBuilder<> Builder(InsertPt);
2901*9880d681SAndroid Build Coastguard Worker       IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
2902*9880d681SAndroid Build Coastguard Worker     }
2903*9880d681SAndroid Build Coastguard Worker     Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
2904*9880d681SAndroid Build Coastguard Worker     DeadInsts.emplace_back(Inc.IVOperand);
2905*9880d681SAndroid Build Coastguard Worker   }
2906*9880d681SAndroid Build Coastguard Worker   // If LSR created a new, wider phi, we may also replace its postinc. We only
2907*9880d681SAndroid Build Coastguard Worker   // do this if we also found a wide value for the head of the chain.
2908*9880d681SAndroid Build Coastguard Worker   if (isa<PHINode>(Chain.tailUserInst())) {
2909*9880d681SAndroid Build Coastguard Worker     for (BasicBlock::iterator I = L->getHeader()->begin();
2910*9880d681SAndroid Build Coastguard Worker          PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
2911*9880d681SAndroid Build Coastguard Worker       if (!isCompatibleIVType(Phi, IVSrc))
2912*9880d681SAndroid Build Coastguard Worker         continue;
2913*9880d681SAndroid Build Coastguard Worker       Instruction *PostIncV = dyn_cast<Instruction>(
2914*9880d681SAndroid Build Coastguard Worker         Phi->getIncomingValueForBlock(L->getLoopLatch()));
2915*9880d681SAndroid Build Coastguard Worker       if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
2916*9880d681SAndroid Build Coastguard Worker         continue;
2917*9880d681SAndroid Build Coastguard Worker       Value *IVOper = IVSrc;
2918*9880d681SAndroid Build Coastguard Worker       Type *PostIncTy = PostIncV->getType();
2919*9880d681SAndroid Build Coastguard Worker       if (IVTy != PostIncTy) {
2920*9880d681SAndroid Build Coastguard Worker         assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
2921*9880d681SAndroid Build Coastguard Worker         IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
2922*9880d681SAndroid Build Coastguard Worker         Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
2923*9880d681SAndroid Build Coastguard Worker         IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
2924*9880d681SAndroid Build Coastguard Worker       }
2925*9880d681SAndroid Build Coastguard Worker       Phi->replaceUsesOfWith(PostIncV, IVOper);
2926*9880d681SAndroid Build Coastguard Worker       DeadInsts.emplace_back(PostIncV);
2927*9880d681SAndroid Build Coastguard Worker     }
2928*9880d681SAndroid Build Coastguard Worker   }
2929*9880d681SAndroid Build Coastguard Worker }
2930*9880d681SAndroid Build Coastguard Worker 
CollectFixupsAndInitialFormulae()2931*9880d681SAndroid Build Coastguard Worker void LSRInstance::CollectFixupsAndInitialFormulae() {
2932*9880d681SAndroid Build Coastguard Worker   for (const IVStrideUse &U : IU) {
2933*9880d681SAndroid Build Coastguard Worker     Instruction *UserInst = U.getUser();
2934*9880d681SAndroid Build Coastguard Worker     // Skip IV users that are part of profitable IV Chains.
2935*9880d681SAndroid Build Coastguard Worker     User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
2936*9880d681SAndroid Build Coastguard Worker                                        U.getOperandValToReplace());
2937*9880d681SAndroid Build Coastguard Worker     assert(UseI != UserInst->op_end() && "cannot find IV operand");
2938*9880d681SAndroid Build Coastguard Worker     if (IVIncSet.count(UseI))
2939*9880d681SAndroid Build Coastguard Worker       continue;
2940*9880d681SAndroid Build Coastguard Worker 
2941*9880d681SAndroid Build Coastguard Worker     // Record the uses.
2942*9880d681SAndroid Build Coastguard Worker     LSRFixup &LF = getNewFixup();
2943*9880d681SAndroid Build Coastguard Worker     LF.UserInst = UserInst;
2944*9880d681SAndroid Build Coastguard Worker     LF.OperandValToReplace = U.getOperandValToReplace();
2945*9880d681SAndroid Build Coastguard Worker     LF.PostIncLoops = U.getPostIncLoops();
2946*9880d681SAndroid Build Coastguard Worker 
2947*9880d681SAndroid Build Coastguard Worker     LSRUse::KindType Kind = LSRUse::Basic;
2948*9880d681SAndroid Build Coastguard Worker     MemAccessTy AccessTy;
2949*9880d681SAndroid Build Coastguard Worker     if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
2950*9880d681SAndroid Build Coastguard Worker       Kind = LSRUse::Address;
2951*9880d681SAndroid Build Coastguard Worker       AccessTy = getAccessType(LF.UserInst);
2952*9880d681SAndroid Build Coastguard Worker     }
2953*9880d681SAndroid Build Coastguard Worker 
2954*9880d681SAndroid Build Coastguard Worker     const SCEV *S = IU.getExpr(U);
2955*9880d681SAndroid Build Coastguard Worker 
2956*9880d681SAndroid Build Coastguard Worker     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
2957*9880d681SAndroid Build Coastguard Worker     // (N - i == 0), and this allows (N - i) to be the expression that we work
2958*9880d681SAndroid Build Coastguard Worker     // with rather than just N or i, so we can consider the register
2959*9880d681SAndroid Build Coastguard Worker     // requirements for both N and i at the same time. Limiting this code to
2960*9880d681SAndroid Build Coastguard Worker     // equality icmps is not a problem because all interesting loops use
2961*9880d681SAndroid Build Coastguard Worker     // equality icmps, thanks to IndVarSimplify.
2962*9880d681SAndroid Build Coastguard Worker     if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
2963*9880d681SAndroid Build Coastguard Worker       if (CI->isEquality()) {
2964*9880d681SAndroid Build Coastguard Worker         // Swap the operands if needed to put the OperandValToReplace on the
2965*9880d681SAndroid Build Coastguard Worker         // left, for consistency.
2966*9880d681SAndroid Build Coastguard Worker         Value *NV = CI->getOperand(1);
2967*9880d681SAndroid Build Coastguard Worker         if (NV == LF.OperandValToReplace) {
2968*9880d681SAndroid Build Coastguard Worker           CI->setOperand(1, CI->getOperand(0));
2969*9880d681SAndroid Build Coastguard Worker           CI->setOperand(0, NV);
2970*9880d681SAndroid Build Coastguard Worker           NV = CI->getOperand(1);
2971*9880d681SAndroid Build Coastguard Worker           Changed = true;
2972*9880d681SAndroid Build Coastguard Worker         }
2973*9880d681SAndroid Build Coastguard Worker 
2974*9880d681SAndroid Build Coastguard Worker         // x == y  -->  x - y == 0
2975*9880d681SAndroid Build Coastguard Worker         const SCEV *N = SE.getSCEV(NV);
2976*9880d681SAndroid Build Coastguard Worker         if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
2977*9880d681SAndroid Build Coastguard Worker           // S is normalized, so normalize N before folding it into S
2978*9880d681SAndroid Build Coastguard Worker           // to keep the result normalized.
2979*9880d681SAndroid Build Coastguard Worker           N = TransformForPostIncUse(Normalize, N, CI, nullptr,
2980*9880d681SAndroid Build Coastguard Worker                                      LF.PostIncLoops, SE, DT);
2981*9880d681SAndroid Build Coastguard Worker           Kind = LSRUse::ICmpZero;
2982*9880d681SAndroid Build Coastguard Worker           S = SE.getMinusSCEV(N, S);
2983*9880d681SAndroid Build Coastguard Worker         }
2984*9880d681SAndroid Build Coastguard Worker 
2985*9880d681SAndroid Build Coastguard Worker         // -1 and the negations of all interesting strides (except the negation
2986*9880d681SAndroid Build Coastguard Worker         // of -1) are now also interesting.
2987*9880d681SAndroid Build Coastguard Worker         for (size_t i = 0, e = Factors.size(); i != e; ++i)
2988*9880d681SAndroid Build Coastguard Worker           if (Factors[i] != -1)
2989*9880d681SAndroid Build Coastguard Worker             Factors.insert(-(uint64_t)Factors[i]);
2990*9880d681SAndroid Build Coastguard Worker         Factors.insert(-1);
2991*9880d681SAndroid Build Coastguard Worker       }
2992*9880d681SAndroid Build Coastguard Worker 
2993*9880d681SAndroid Build Coastguard Worker     // Set up the initial formula for this use.
2994*9880d681SAndroid Build Coastguard Worker     std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
2995*9880d681SAndroid Build Coastguard Worker     LF.LUIdx = P.first;
2996*9880d681SAndroid Build Coastguard Worker     LF.Offset = P.second;
2997*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LF.LUIdx];
2998*9880d681SAndroid Build Coastguard Worker     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
2999*9880d681SAndroid Build Coastguard Worker     if (!LU.WidestFixupType ||
3000*9880d681SAndroid Build Coastguard Worker         SE.getTypeSizeInBits(LU.WidestFixupType) <
3001*9880d681SAndroid Build Coastguard Worker         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3002*9880d681SAndroid Build Coastguard Worker       LU.WidestFixupType = LF.OperandValToReplace->getType();
3003*9880d681SAndroid Build Coastguard Worker 
3004*9880d681SAndroid Build Coastguard Worker     // If this is the first use of this LSRUse, give it a formula.
3005*9880d681SAndroid Build Coastguard Worker     if (LU.Formulae.empty()) {
3006*9880d681SAndroid Build Coastguard Worker       InsertInitialFormula(S, LU, LF.LUIdx);
3007*9880d681SAndroid Build Coastguard Worker       CountRegisters(LU.Formulae.back(), LF.LUIdx);
3008*9880d681SAndroid Build Coastguard Worker     }
3009*9880d681SAndroid Build Coastguard Worker   }
3010*9880d681SAndroid Build Coastguard Worker 
3011*9880d681SAndroid Build Coastguard Worker   DEBUG(print_fixups(dbgs()));
3012*9880d681SAndroid Build Coastguard Worker }
3013*9880d681SAndroid Build Coastguard Worker 
3014*9880d681SAndroid Build Coastguard Worker /// Insert a formula for the given expression into the given use, separating out
3015*9880d681SAndroid Build Coastguard Worker /// loop-variant portions from loop-invariant and loop-computable portions.
3016*9880d681SAndroid Build Coastguard Worker void
InsertInitialFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3017*9880d681SAndroid Build Coastguard Worker LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
3018*9880d681SAndroid Build Coastguard Worker   // Mark uses whose expressions cannot be expanded.
3019*9880d681SAndroid Build Coastguard Worker   if (!isSafeToExpand(S, SE))
3020*9880d681SAndroid Build Coastguard Worker     LU.RigidFormula = true;
3021*9880d681SAndroid Build Coastguard Worker 
3022*9880d681SAndroid Build Coastguard Worker   Formula F;
3023*9880d681SAndroid Build Coastguard Worker   F.initialMatch(S, L, SE);
3024*9880d681SAndroid Build Coastguard Worker   bool Inserted = InsertFormula(LU, LUIdx, F);
3025*9880d681SAndroid Build Coastguard Worker   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3026*9880d681SAndroid Build Coastguard Worker }
3027*9880d681SAndroid Build Coastguard Worker 
3028*9880d681SAndroid Build Coastguard Worker /// Insert a simple single-register formula for the given expression into the
3029*9880d681SAndroid Build Coastguard Worker /// given use.
3030*9880d681SAndroid Build Coastguard Worker void
InsertSupplementalFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3031*9880d681SAndroid Build Coastguard Worker LSRInstance::InsertSupplementalFormula(const SCEV *S,
3032*9880d681SAndroid Build Coastguard Worker                                        LSRUse &LU, size_t LUIdx) {
3033*9880d681SAndroid Build Coastguard Worker   Formula F;
3034*9880d681SAndroid Build Coastguard Worker   F.BaseRegs.push_back(S);
3035*9880d681SAndroid Build Coastguard Worker   F.HasBaseReg = true;
3036*9880d681SAndroid Build Coastguard Worker   bool Inserted = InsertFormula(LU, LUIdx, F);
3037*9880d681SAndroid Build Coastguard Worker   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3038*9880d681SAndroid Build Coastguard Worker }
3039*9880d681SAndroid Build Coastguard Worker 
3040*9880d681SAndroid Build Coastguard Worker /// Note which registers are used by the given formula, updating RegUses.
CountRegisters(const Formula & F,size_t LUIdx)3041*9880d681SAndroid Build Coastguard Worker void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3042*9880d681SAndroid Build Coastguard Worker   if (F.ScaledReg)
3043*9880d681SAndroid Build Coastguard Worker     RegUses.countRegister(F.ScaledReg, LUIdx);
3044*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : F.BaseRegs)
3045*9880d681SAndroid Build Coastguard Worker     RegUses.countRegister(BaseReg, LUIdx);
3046*9880d681SAndroid Build Coastguard Worker }
3047*9880d681SAndroid Build Coastguard Worker 
3048*9880d681SAndroid Build Coastguard Worker /// If the given formula has not yet been inserted, add it to the list, and
3049*9880d681SAndroid Build Coastguard Worker /// return true. Return false otherwise.
InsertFormula(LSRUse & LU,unsigned LUIdx,const Formula & F)3050*9880d681SAndroid Build Coastguard Worker bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3051*9880d681SAndroid Build Coastguard Worker   // Do not insert formula that we will not be able to expand.
3052*9880d681SAndroid Build Coastguard Worker   assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3053*9880d681SAndroid Build Coastguard Worker          "Formula is illegal");
3054*9880d681SAndroid Build Coastguard Worker   if (!LU.InsertFormula(F))
3055*9880d681SAndroid Build Coastguard Worker     return false;
3056*9880d681SAndroid Build Coastguard Worker 
3057*9880d681SAndroid Build Coastguard Worker   CountRegisters(F, LUIdx);
3058*9880d681SAndroid Build Coastguard Worker   return true;
3059*9880d681SAndroid Build Coastguard Worker }
3060*9880d681SAndroid Build Coastguard Worker 
3061*9880d681SAndroid Build Coastguard Worker /// Check for other uses of loop-invariant values which we're tracking. These
3062*9880d681SAndroid Build Coastguard Worker /// other uses will pin these values in registers, making them less profitable
3063*9880d681SAndroid Build Coastguard Worker /// for elimination.
3064*9880d681SAndroid Build Coastguard Worker /// TODO: This currently misses non-constant addrec step registers.
3065*9880d681SAndroid Build Coastguard Worker /// TODO: Should this give more weight to users inside the loop?
3066*9880d681SAndroid Build Coastguard Worker void
CollectLoopInvariantFixupsAndFormulae()3067*9880d681SAndroid Build Coastguard Worker LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3068*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
3069*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 32> Visited;
3070*9880d681SAndroid Build Coastguard Worker 
3071*9880d681SAndroid Build Coastguard Worker   while (!Worklist.empty()) {
3072*9880d681SAndroid Build Coastguard Worker     const SCEV *S = Worklist.pop_back_val();
3073*9880d681SAndroid Build Coastguard Worker 
3074*9880d681SAndroid Build Coastguard Worker     // Don't process the same SCEV twice
3075*9880d681SAndroid Build Coastguard Worker     if (!Visited.insert(S).second)
3076*9880d681SAndroid Build Coastguard Worker       continue;
3077*9880d681SAndroid Build Coastguard Worker 
3078*9880d681SAndroid Build Coastguard Worker     if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3079*9880d681SAndroid Build Coastguard Worker       Worklist.append(N->op_begin(), N->op_end());
3080*9880d681SAndroid Build Coastguard Worker     else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
3081*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(C->getOperand());
3082*9880d681SAndroid Build Coastguard Worker     else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
3083*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(D->getLHS());
3084*9880d681SAndroid Build Coastguard Worker       Worklist.push_back(D->getRHS());
3085*9880d681SAndroid Build Coastguard Worker     } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3086*9880d681SAndroid Build Coastguard Worker       const Value *V = US->getValue();
3087*9880d681SAndroid Build Coastguard Worker       if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
3088*9880d681SAndroid Build Coastguard Worker         // Look for instructions defined outside the loop.
3089*9880d681SAndroid Build Coastguard Worker         if (L->contains(Inst)) continue;
3090*9880d681SAndroid Build Coastguard Worker       } else if (isa<UndefValue>(V))
3091*9880d681SAndroid Build Coastguard Worker         // Undef doesn't have a live range, so it doesn't matter.
3092*9880d681SAndroid Build Coastguard Worker         continue;
3093*9880d681SAndroid Build Coastguard Worker       for (const Use &U : V->uses()) {
3094*9880d681SAndroid Build Coastguard Worker         const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
3095*9880d681SAndroid Build Coastguard Worker         // Ignore non-instructions.
3096*9880d681SAndroid Build Coastguard Worker         if (!UserInst)
3097*9880d681SAndroid Build Coastguard Worker           continue;
3098*9880d681SAndroid Build Coastguard Worker         // Ignore instructions in other functions (as can happen with
3099*9880d681SAndroid Build Coastguard Worker         // Constants).
3100*9880d681SAndroid Build Coastguard Worker         if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3101*9880d681SAndroid Build Coastguard Worker           continue;
3102*9880d681SAndroid Build Coastguard Worker         // Ignore instructions not dominated by the loop.
3103*9880d681SAndroid Build Coastguard Worker         const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3104*9880d681SAndroid Build Coastguard Worker           UserInst->getParent() :
3105*9880d681SAndroid Build Coastguard Worker           cast<PHINode>(UserInst)->getIncomingBlock(
3106*9880d681SAndroid Build Coastguard Worker             PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
3107*9880d681SAndroid Build Coastguard Worker         if (!DT.dominates(L->getHeader(), UseBB))
3108*9880d681SAndroid Build Coastguard Worker           continue;
3109*9880d681SAndroid Build Coastguard Worker         // Don't bother if the instruction is in a BB which ends in an EHPad.
3110*9880d681SAndroid Build Coastguard Worker         if (UseBB->getTerminator()->isEHPad())
3111*9880d681SAndroid Build Coastguard Worker           continue;
3112*9880d681SAndroid Build Coastguard Worker         // Ignore uses which are part of other SCEV expressions, to avoid
3113*9880d681SAndroid Build Coastguard Worker         // analyzing them multiple times.
3114*9880d681SAndroid Build Coastguard Worker         if (SE.isSCEVable(UserInst->getType())) {
3115*9880d681SAndroid Build Coastguard Worker           const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3116*9880d681SAndroid Build Coastguard Worker           // If the user is a no-op, look through to its uses.
3117*9880d681SAndroid Build Coastguard Worker           if (!isa<SCEVUnknown>(UserS))
3118*9880d681SAndroid Build Coastguard Worker             continue;
3119*9880d681SAndroid Build Coastguard Worker           if (UserS == US) {
3120*9880d681SAndroid Build Coastguard Worker             Worklist.push_back(
3121*9880d681SAndroid Build Coastguard Worker               SE.getUnknown(const_cast<Instruction *>(UserInst)));
3122*9880d681SAndroid Build Coastguard Worker             continue;
3123*9880d681SAndroid Build Coastguard Worker           }
3124*9880d681SAndroid Build Coastguard Worker         }
3125*9880d681SAndroid Build Coastguard Worker         // Ignore icmp instructions which are already being analyzed.
3126*9880d681SAndroid Build Coastguard Worker         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3127*9880d681SAndroid Build Coastguard Worker           unsigned OtherIdx = !U.getOperandNo();
3128*9880d681SAndroid Build Coastguard Worker           Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
3129*9880d681SAndroid Build Coastguard Worker           if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
3130*9880d681SAndroid Build Coastguard Worker             continue;
3131*9880d681SAndroid Build Coastguard Worker         }
3132*9880d681SAndroid Build Coastguard Worker 
3133*9880d681SAndroid Build Coastguard Worker         LSRFixup &LF = getNewFixup();
3134*9880d681SAndroid Build Coastguard Worker         LF.UserInst = const_cast<Instruction *>(UserInst);
3135*9880d681SAndroid Build Coastguard Worker         LF.OperandValToReplace = U;
3136*9880d681SAndroid Build Coastguard Worker         std::pair<size_t, int64_t> P = getUse(
3137*9880d681SAndroid Build Coastguard Worker             S, LSRUse::Basic, MemAccessTy());
3138*9880d681SAndroid Build Coastguard Worker         LF.LUIdx = P.first;
3139*9880d681SAndroid Build Coastguard Worker         LF.Offset = P.second;
3140*9880d681SAndroid Build Coastguard Worker         LSRUse &LU = Uses[LF.LUIdx];
3141*9880d681SAndroid Build Coastguard Worker         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3142*9880d681SAndroid Build Coastguard Worker         if (!LU.WidestFixupType ||
3143*9880d681SAndroid Build Coastguard Worker             SE.getTypeSizeInBits(LU.WidestFixupType) <
3144*9880d681SAndroid Build Coastguard Worker             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
3145*9880d681SAndroid Build Coastguard Worker           LU.WidestFixupType = LF.OperandValToReplace->getType();
3146*9880d681SAndroid Build Coastguard Worker         InsertSupplementalFormula(US, LU, LF.LUIdx);
3147*9880d681SAndroid Build Coastguard Worker         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3148*9880d681SAndroid Build Coastguard Worker         break;
3149*9880d681SAndroid Build Coastguard Worker       }
3150*9880d681SAndroid Build Coastguard Worker     }
3151*9880d681SAndroid Build Coastguard Worker   }
3152*9880d681SAndroid Build Coastguard Worker }
3153*9880d681SAndroid Build Coastguard Worker 
3154*9880d681SAndroid Build Coastguard Worker /// Split S into subexpressions which can be pulled out into separate
3155*9880d681SAndroid Build Coastguard Worker /// registers. If C is non-null, multiply each subexpression by C.
3156*9880d681SAndroid Build Coastguard Worker ///
3157*9880d681SAndroid Build Coastguard Worker /// Return remainder expression after factoring the subexpressions captured by
3158*9880d681SAndroid Build Coastguard Worker /// Ops. If Ops is complete, return NULL.
CollectSubexprs(const SCEV * S,const SCEVConstant * C,SmallVectorImpl<const SCEV * > & Ops,const Loop * L,ScalarEvolution & SE,unsigned Depth=0)3159*9880d681SAndroid Build Coastguard Worker static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
3160*9880d681SAndroid Build Coastguard Worker                                    SmallVectorImpl<const SCEV *> &Ops,
3161*9880d681SAndroid Build Coastguard Worker                                    const Loop *L,
3162*9880d681SAndroid Build Coastguard Worker                                    ScalarEvolution &SE,
3163*9880d681SAndroid Build Coastguard Worker                                    unsigned Depth = 0) {
3164*9880d681SAndroid Build Coastguard Worker   // Arbitrarily cap recursion to protect compile time.
3165*9880d681SAndroid Build Coastguard Worker   if (Depth >= 3)
3166*9880d681SAndroid Build Coastguard Worker     return S;
3167*9880d681SAndroid Build Coastguard Worker 
3168*9880d681SAndroid Build Coastguard Worker   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
3169*9880d681SAndroid Build Coastguard Worker     // Break out add operands.
3170*9880d681SAndroid Build Coastguard Worker     for (const SCEV *S : Add->operands()) {
3171*9880d681SAndroid Build Coastguard Worker       const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
3172*9880d681SAndroid Build Coastguard Worker       if (Remainder)
3173*9880d681SAndroid Build Coastguard Worker         Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3174*9880d681SAndroid Build Coastguard Worker     }
3175*9880d681SAndroid Build Coastguard Worker     return nullptr;
3176*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
3177*9880d681SAndroid Build Coastguard Worker     // Split a non-zero base out of an addrec.
3178*9880d681SAndroid Build Coastguard Worker     if (AR->getStart()->isZero())
3179*9880d681SAndroid Build Coastguard Worker       return S;
3180*9880d681SAndroid Build Coastguard Worker 
3181*9880d681SAndroid Build Coastguard Worker     const SCEV *Remainder = CollectSubexprs(AR->getStart(),
3182*9880d681SAndroid Build Coastguard Worker                                             C, Ops, L, SE, Depth+1);
3183*9880d681SAndroid Build Coastguard Worker     // Split the non-zero AddRec unless it is part of a nested recurrence that
3184*9880d681SAndroid Build Coastguard Worker     // does not pertain to this loop.
3185*9880d681SAndroid Build Coastguard Worker     if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
3186*9880d681SAndroid Build Coastguard Worker       Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3187*9880d681SAndroid Build Coastguard Worker       Remainder = nullptr;
3188*9880d681SAndroid Build Coastguard Worker     }
3189*9880d681SAndroid Build Coastguard Worker     if (Remainder != AR->getStart()) {
3190*9880d681SAndroid Build Coastguard Worker       if (!Remainder)
3191*9880d681SAndroid Build Coastguard Worker         Remainder = SE.getConstant(AR->getType(), 0);
3192*9880d681SAndroid Build Coastguard Worker       return SE.getAddRecExpr(Remainder,
3193*9880d681SAndroid Build Coastguard Worker                               AR->getStepRecurrence(SE),
3194*9880d681SAndroid Build Coastguard Worker                               AR->getLoop(),
3195*9880d681SAndroid Build Coastguard Worker                               //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3196*9880d681SAndroid Build Coastguard Worker                               SCEV::FlagAnyWrap);
3197*9880d681SAndroid Build Coastguard Worker     }
3198*9880d681SAndroid Build Coastguard Worker   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
3199*9880d681SAndroid Build Coastguard Worker     // Break (C * (a + b + c)) into C*a + C*b + C*c.
3200*9880d681SAndroid Build Coastguard Worker     if (Mul->getNumOperands() != 2)
3201*9880d681SAndroid Build Coastguard Worker       return S;
3202*9880d681SAndroid Build Coastguard Worker     if (const SCEVConstant *Op0 =
3203*9880d681SAndroid Build Coastguard Worker         dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3204*9880d681SAndroid Build Coastguard Worker       C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
3205*9880d681SAndroid Build Coastguard Worker       const SCEV *Remainder =
3206*9880d681SAndroid Build Coastguard Worker         CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
3207*9880d681SAndroid Build Coastguard Worker       if (Remainder)
3208*9880d681SAndroid Build Coastguard Worker         Ops.push_back(SE.getMulExpr(C, Remainder));
3209*9880d681SAndroid Build Coastguard Worker       return nullptr;
3210*9880d681SAndroid Build Coastguard Worker     }
3211*9880d681SAndroid Build Coastguard Worker   }
3212*9880d681SAndroid Build Coastguard Worker   return S;
3213*9880d681SAndroid Build Coastguard Worker }
3214*9880d681SAndroid Build Coastguard Worker 
3215*9880d681SAndroid Build Coastguard Worker /// \brief Helper function for LSRInstance::GenerateReassociations.
GenerateReassociationsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,unsigned Depth,size_t Idx,bool IsScaledReg)3216*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3217*9880d681SAndroid Build Coastguard Worker                                              const Formula &Base,
3218*9880d681SAndroid Build Coastguard Worker                                              unsigned Depth, size_t Idx,
3219*9880d681SAndroid Build Coastguard Worker                                              bool IsScaledReg) {
3220*9880d681SAndroid Build Coastguard Worker   const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3221*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 8> AddOps;
3222*9880d681SAndroid Build Coastguard Worker   const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3223*9880d681SAndroid Build Coastguard Worker   if (Remainder)
3224*9880d681SAndroid Build Coastguard Worker     AddOps.push_back(Remainder);
3225*9880d681SAndroid Build Coastguard Worker 
3226*9880d681SAndroid Build Coastguard Worker   if (AddOps.size() == 1)
3227*9880d681SAndroid Build Coastguard Worker     return;
3228*9880d681SAndroid Build Coastguard Worker 
3229*9880d681SAndroid Build Coastguard Worker   for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3230*9880d681SAndroid Build Coastguard Worker                                                      JE = AddOps.end();
3231*9880d681SAndroid Build Coastguard Worker        J != JE; ++J) {
3232*9880d681SAndroid Build Coastguard Worker 
3233*9880d681SAndroid Build Coastguard Worker     // Loop-variant "unknown" values are uninteresting; we won't be able to
3234*9880d681SAndroid Build Coastguard Worker     // do anything meaningful with them.
3235*9880d681SAndroid Build Coastguard Worker     if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
3236*9880d681SAndroid Build Coastguard Worker       continue;
3237*9880d681SAndroid Build Coastguard Worker 
3238*9880d681SAndroid Build Coastguard Worker     // Don't pull a constant into a register if the constant could be folded
3239*9880d681SAndroid Build Coastguard Worker     // into an immediate field.
3240*9880d681SAndroid Build Coastguard Worker     if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3241*9880d681SAndroid Build Coastguard Worker                          LU.AccessTy, *J, Base.getNumRegs() > 1))
3242*9880d681SAndroid Build Coastguard Worker       continue;
3243*9880d681SAndroid Build Coastguard Worker 
3244*9880d681SAndroid Build Coastguard Worker     // Collect all operands except *J.
3245*9880d681SAndroid Build Coastguard Worker     SmallVector<const SCEV *, 8> InnerAddOps(
3246*9880d681SAndroid Build Coastguard Worker         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
3247*9880d681SAndroid Build Coastguard Worker     InnerAddOps.append(std::next(J),
3248*9880d681SAndroid Build Coastguard Worker                        ((const SmallVector<const SCEV *, 8> &)AddOps).end());
3249*9880d681SAndroid Build Coastguard Worker 
3250*9880d681SAndroid Build Coastguard Worker     // Don't leave just a constant behind in a register if the constant could
3251*9880d681SAndroid Build Coastguard Worker     // be folded into an immediate field.
3252*9880d681SAndroid Build Coastguard Worker     if (InnerAddOps.size() == 1 &&
3253*9880d681SAndroid Build Coastguard Worker         isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
3254*9880d681SAndroid Build Coastguard Worker                          LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3255*9880d681SAndroid Build Coastguard Worker       continue;
3256*9880d681SAndroid Build Coastguard Worker 
3257*9880d681SAndroid Build Coastguard Worker     const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3258*9880d681SAndroid Build Coastguard Worker     if (InnerSum->isZero())
3259*9880d681SAndroid Build Coastguard Worker       continue;
3260*9880d681SAndroid Build Coastguard Worker     Formula F = Base;
3261*9880d681SAndroid Build Coastguard Worker 
3262*9880d681SAndroid Build Coastguard Worker     // Add the remaining pieces of the add back into the new formula.
3263*9880d681SAndroid Build Coastguard Worker     const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3264*9880d681SAndroid Build Coastguard Worker     if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
3265*9880d681SAndroid Build Coastguard Worker         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3266*9880d681SAndroid Build Coastguard Worker                                 InnerSumSC->getValue()->getZExtValue())) {
3267*9880d681SAndroid Build Coastguard Worker       F.UnfoldedOffset =
3268*9880d681SAndroid Build Coastguard Worker           (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
3269*9880d681SAndroid Build Coastguard Worker       if (IsScaledReg)
3270*9880d681SAndroid Build Coastguard Worker         F.ScaledReg = nullptr;
3271*9880d681SAndroid Build Coastguard Worker       else
3272*9880d681SAndroid Build Coastguard Worker         F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3273*9880d681SAndroid Build Coastguard Worker     } else if (IsScaledReg)
3274*9880d681SAndroid Build Coastguard Worker       F.ScaledReg = InnerSum;
3275*9880d681SAndroid Build Coastguard Worker     else
3276*9880d681SAndroid Build Coastguard Worker       F.BaseRegs[Idx] = InnerSum;
3277*9880d681SAndroid Build Coastguard Worker 
3278*9880d681SAndroid Build Coastguard Worker     // Add J as its own register, or an unfolded immediate.
3279*9880d681SAndroid Build Coastguard Worker     const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
3280*9880d681SAndroid Build Coastguard Worker     if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
3281*9880d681SAndroid Build Coastguard Worker         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
3282*9880d681SAndroid Build Coastguard Worker                                 SC->getValue()->getZExtValue()))
3283*9880d681SAndroid Build Coastguard Worker       F.UnfoldedOffset =
3284*9880d681SAndroid Build Coastguard Worker           (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
3285*9880d681SAndroid Build Coastguard Worker     else
3286*9880d681SAndroid Build Coastguard Worker       F.BaseRegs.push_back(*J);
3287*9880d681SAndroid Build Coastguard Worker     // We may have changed the number of register in base regs, adjust the
3288*9880d681SAndroid Build Coastguard Worker     // formula accordingly.
3289*9880d681SAndroid Build Coastguard Worker     F.canonicalize();
3290*9880d681SAndroid Build Coastguard Worker 
3291*9880d681SAndroid Build Coastguard Worker     if (InsertFormula(LU, LUIdx, F))
3292*9880d681SAndroid Build Coastguard Worker       // If that formula hadn't been seen before, recurse to find more like
3293*9880d681SAndroid Build Coastguard Worker       // it.
3294*9880d681SAndroid Build Coastguard Worker       GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
3295*9880d681SAndroid Build Coastguard Worker   }
3296*9880d681SAndroid Build Coastguard Worker }
3297*9880d681SAndroid Build Coastguard Worker 
3298*9880d681SAndroid Build Coastguard Worker /// Split out subexpressions from adds and the bases of addrecs.
GenerateReassociations(LSRUse & LU,unsigned LUIdx,Formula Base,unsigned Depth)3299*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3300*9880d681SAndroid Build Coastguard Worker                                          Formula Base, unsigned Depth) {
3301*9880d681SAndroid Build Coastguard Worker   assert(Base.isCanonical() && "Input must be in the canonical form");
3302*9880d681SAndroid Build Coastguard Worker   // Arbitrarily cap recursion to protect compile time.
3303*9880d681SAndroid Build Coastguard Worker   if (Depth >= 3)
3304*9880d681SAndroid Build Coastguard Worker     return;
3305*9880d681SAndroid Build Coastguard Worker 
3306*9880d681SAndroid Build Coastguard Worker   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3307*9880d681SAndroid Build Coastguard Worker     GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
3308*9880d681SAndroid Build Coastguard Worker 
3309*9880d681SAndroid Build Coastguard Worker   if (Base.Scale == 1)
3310*9880d681SAndroid Build Coastguard Worker     GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3311*9880d681SAndroid Build Coastguard Worker                                /* Idx */ -1, /* IsScaledReg */ true);
3312*9880d681SAndroid Build Coastguard Worker }
3313*9880d681SAndroid Build Coastguard Worker 
3314*9880d681SAndroid Build Coastguard Worker ///  Generate a formula consisting of all of the loop-dominating registers added
3315*9880d681SAndroid Build Coastguard Worker /// into a single register.
GenerateCombinations(LSRUse & LU,unsigned LUIdx,Formula Base)3316*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3317*9880d681SAndroid Build Coastguard Worker                                        Formula Base) {
3318*9880d681SAndroid Build Coastguard Worker   // This method is only interesting on a plurality of registers.
3319*9880d681SAndroid Build Coastguard Worker   if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
3320*9880d681SAndroid Build Coastguard Worker     return;
3321*9880d681SAndroid Build Coastguard Worker 
3322*9880d681SAndroid Build Coastguard Worker   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
3323*9880d681SAndroid Build Coastguard Worker   // processing the formula.
3324*9880d681SAndroid Build Coastguard Worker   Base.unscale();
3325*9880d681SAndroid Build Coastguard Worker   Formula F = Base;
3326*9880d681SAndroid Build Coastguard Worker   F.BaseRegs.clear();
3327*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 4> Ops;
3328*9880d681SAndroid Build Coastguard Worker   for (const SCEV *BaseReg : Base.BaseRegs) {
3329*9880d681SAndroid Build Coastguard Worker     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
3330*9880d681SAndroid Build Coastguard Worker         !SE.hasComputableLoopEvolution(BaseReg, L))
3331*9880d681SAndroid Build Coastguard Worker       Ops.push_back(BaseReg);
3332*9880d681SAndroid Build Coastguard Worker     else
3333*9880d681SAndroid Build Coastguard Worker       F.BaseRegs.push_back(BaseReg);
3334*9880d681SAndroid Build Coastguard Worker   }
3335*9880d681SAndroid Build Coastguard Worker   if (Ops.size() > 1) {
3336*9880d681SAndroid Build Coastguard Worker     const SCEV *Sum = SE.getAddExpr(Ops);
3337*9880d681SAndroid Build Coastguard Worker     // TODO: If Sum is zero, it probably means ScalarEvolution missed an
3338*9880d681SAndroid Build Coastguard Worker     // opportunity to fold something. For now, just ignore such cases
3339*9880d681SAndroid Build Coastguard Worker     // rather than proceed with zero in a register.
3340*9880d681SAndroid Build Coastguard Worker     if (!Sum->isZero()) {
3341*9880d681SAndroid Build Coastguard Worker       F.BaseRegs.push_back(Sum);
3342*9880d681SAndroid Build Coastguard Worker       F.canonicalize();
3343*9880d681SAndroid Build Coastguard Worker       (void)InsertFormula(LU, LUIdx, F);
3344*9880d681SAndroid Build Coastguard Worker     }
3345*9880d681SAndroid Build Coastguard Worker   }
3346*9880d681SAndroid Build Coastguard Worker }
3347*9880d681SAndroid Build Coastguard Worker 
3348*9880d681SAndroid Build Coastguard Worker /// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
GenerateSymbolicOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,size_t Idx,bool IsScaledReg)3349*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
3350*9880d681SAndroid Build Coastguard Worker                                               const Formula &Base, size_t Idx,
3351*9880d681SAndroid Build Coastguard Worker                                               bool IsScaledReg) {
3352*9880d681SAndroid Build Coastguard Worker   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3353*9880d681SAndroid Build Coastguard Worker   GlobalValue *GV = ExtractSymbol(G, SE);
3354*9880d681SAndroid Build Coastguard Worker   if (G->isZero() || !GV)
3355*9880d681SAndroid Build Coastguard Worker     return;
3356*9880d681SAndroid Build Coastguard Worker   Formula F = Base;
3357*9880d681SAndroid Build Coastguard Worker   F.BaseGV = GV;
3358*9880d681SAndroid Build Coastguard Worker   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3359*9880d681SAndroid Build Coastguard Worker     return;
3360*9880d681SAndroid Build Coastguard Worker   if (IsScaledReg)
3361*9880d681SAndroid Build Coastguard Worker     F.ScaledReg = G;
3362*9880d681SAndroid Build Coastguard Worker   else
3363*9880d681SAndroid Build Coastguard Worker     F.BaseRegs[Idx] = G;
3364*9880d681SAndroid Build Coastguard Worker   (void)InsertFormula(LU, LUIdx, F);
3365*9880d681SAndroid Build Coastguard Worker }
3366*9880d681SAndroid Build Coastguard Worker 
3367*9880d681SAndroid Build Coastguard Worker /// Generate reuse formulae using symbolic offsets.
GenerateSymbolicOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3368*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
3369*9880d681SAndroid Build Coastguard Worker                                           Formula Base) {
3370*9880d681SAndroid Build Coastguard Worker   // We can't add a symbolic offset if the address already contains one.
3371*9880d681SAndroid Build Coastguard Worker   if (Base.BaseGV) return;
3372*9880d681SAndroid Build Coastguard Worker 
3373*9880d681SAndroid Build Coastguard Worker   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3374*9880d681SAndroid Build Coastguard Worker     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
3375*9880d681SAndroid Build Coastguard Worker   if (Base.Scale == 1)
3376*9880d681SAndroid Build Coastguard Worker     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
3377*9880d681SAndroid Build Coastguard Worker                                 /* IsScaledReg */ true);
3378*9880d681SAndroid Build Coastguard Worker }
3379*9880d681SAndroid Build Coastguard Worker 
3380*9880d681SAndroid Build Coastguard Worker /// \brief Helper function for LSRInstance::GenerateConstantOffsets.
GenerateConstantOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,const SmallVectorImpl<int64_t> & Worklist,size_t Idx,bool IsScaledReg)3381*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateConstantOffsetsImpl(
3382*9880d681SAndroid Build Coastguard Worker     LSRUse &LU, unsigned LUIdx, const Formula &Base,
3383*9880d681SAndroid Build Coastguard Worker     const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
3384*9880d681SAndroid Build Coastguard Worker   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3385*9880d681SAndroid Build Coastguard Worker   for (int64_t Offset : Worklist) {
3386*9880d681SAndroid Build Coastguard Worker     Formula F = Base;
3387*9880d681SAndroid Build Coastguard Worker     F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
3388*9880d681SAndroid Build Coastguard Worker     if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
3389*9880d681SAndroid Build Coastguard Worker                    LU.AccessTy, F)) {
3390*9880d681SAndroid Build Coastguard Worker       // Add the offset to the base register.
3391*9880d681SAndroid Build Coastguard Worker       const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
3392*9880d681SAndroid Build Coastguard Worker       // If it cancelled out, drop the base register, otherwise update it.
3393*9880d681SAndroid Build Coastguard Worker       if (NewG->isZero()) {
3394*9880d681SAndroid Build Coastguard Worker         if (IsScaledReg) {
3395*9880d681SAndroid Build Coastguard Worker           F.Scale = 0;
3396*9880d681SAndroid Build Coastguard Worker           F.ScaledReg = nullptr;
3397*9880d681SAndroid Build Coastguard Worker         } else
3398*9880d681SAndroid Build Coastguard Worker           F.deleteBaseReg(F.BaseRegs[Idx]);
3399*9880d681SAndroid Build Coastguard Worker         F.canonicalize();
3400*9880d681SAndroid Build Coastguard Worker       } else if (IsScaledReg)
3401*9880d681SAndroid Build Coastguard Worker         F.ScaledReg = NewG;
3402*9880d681SAndroid Build Coastguard Worker       else
3403*9880d681SAndroid Build Coastguard Worker         F.BaseRegs[Idx] = NewG;
3404*9880d681SAndroid Build Coastguard Worker 
3405*9880d681SAndroid Build Coastguard Worker       (void)InsertFormula(LU, LUIdx, F);
3406*9880d681SAndroid Build Coastguard Worker     }
3407*9880d681SAndroid Build Coastguard Worker   }
3408*9880d681SAndroid Build Coastguard Worker 
3409*9880d681SAndroid Build Coastguard Worker   int64_t Imm = ExtractImmediate(G, SE);
3410*9880d681SAndroid Build Coastguard Worker   if (G->isZero() || Imm == 0)
3411*9880d681SAndroid Build Coastguard Worker     return;
3412*9880d681SAndroid Build Coastguard Worker   Formula F = Base;
3413*9880d681SAndroid Build Coastguard Worker   F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
3414*9880d681SAndroid Build Coastguard Worker   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
3415*9880d681SAndroid Build Coastguard Worker     return;
3416*9880d681SAndroid Build Coastguard Worker   if (IsScaledReg)
3417*9880d681SAndroid Build Coastguard Worker     F.ScaledReg = G;
3418*9880d681SAndroid Build Coastguard Worker   else
3419*9880d681SAndroid Build Coastguard Worker     F.BaseRegs[Idx] = G;
3420*9880d681SAndroid Build Coastguard Worker   (void)InsertFormula(LU, LUIdx, F);
3421*9880d681SAndroid Build Coastguard Worker }
3422*9880d681SAndroid Build Coastguard Worker 
3423*9880d681SAndroid Build Coastguard Worker /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
GenerateConstantOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)3424*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
3425*9880d681SAndroid Build Coastguard Worker                                           Formula Base) {
3426*9880d681SAndroid Build Coastguard Worker   // TODO: For now, just add the min and max offset, because it usually isn't
3427*9880d681SAndroid Build Coastguard Worker   // worthwhile looking at everything inbetween.
3428*9880d681SAndroid Build Coastguard Worker   SmallVector<int64_t, 2> Worklist;
3429*9880d681SAndroid Build Coastguard Worker   Worklist.push_back(LU.MinOffset);
3430*9880d681SAndroid Build Coastguard Worker   if (LU.MaxOffset != LU.MinOffset)
3431*9880d681SAndroid Build Coastguard Worker     Worklist.push_back(LU.MaxOffset);
3432*9880d681SAndroid Build Coastguard Worker 
3433*9880d681SAndroid Build Coastguard Worker   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3434*9880d681SAndroid Build Coastguard Worker     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
3435*9880d681SAndroid Build Coastguard Worker   if (Base.Scale == 1)
3436*9880d681SAndroid Build Coastguard Worker     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
3437*9880d681SAndroid Build Coastguard Worker                                 /* IsScaledReg */ true);
3438*9880d681SAndroid Build Coastguard Worker }
3439*9880d681SAndroid Build Coastguard Worker 
3440*9880d681SAndroid Build Coastguard Worker /// For ICmpZero, check to see if we can scale up the comparison. For example, x
3441*9880d681SAndroid Build Coastguard Worker /// == y -> x*c == y*c.
GenerateICmpZeroScales(LSRUse & LU,unsigned LUIdx,Formula Base)3442*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
3443*9880d681SAndroid Build Coastguard Worker                                          Formula Base) {
3444*9880d681SAndroid Build Coastguard Worker   if (LU.Kind != LSRUse::ICmpZero) return;
3445*9880d681SAndroid Build Coastguard Worker 
3446*9880d681SAndroid Build Coastguard Worker   // Determine the integer type for the base formula.
3447*9880d681SAndroid Build Coastguard Worker   Type *IntTy = Base.getType();
3448*9880d681SAndroid Build Coastguard Worker   if (!IntTy) return;
3449*9880d681SAndroid Build Coastguard Worker   if (SE.getTypeSizeInBits(IntTy) > 64) return;
3450*9880d681SAndroid Build Coastguard Worker 
3451*9880d681SAndroid Build Coastguard Worker   // Don't do this if there is more than one offset.
3452*9880d681SAndroid Build Coastguard Worker   if (LU.MinOffset != LU.MaxOffset) return;
3453*9880d681SAndroid Build Coastguard Worker 
3454*9880d681SAndroid Build Coastguard Worker   assert(!Base.BaseGV && "ICmpZero use is not legal!");
3455*9880d681SAndroid Build Coastguard Worker 
3456*9880d681SAndroid Build Coastguard Worker   // Check each interesting stride.
3457*9880d681SAndroid Build Coastguard Worker   for (int64_t Factor : Factors) {
3458*9880d681SAndroid Build Coastguard Worker     // Check that the multiplication doesn't overflow.
3459*9880d681SAndroid Build Coastguard Worker     if (Base.BaseOffset == INT64_MIN && Factor == -1)
3460*9880d681SAndroid Build Coastguard Worker       continue;
3461*9880d681SAndroid Build Coastguard Worker     int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
3462*9880d681SAndroid Build Coastguard Worker     if (NewBaseOffset / Factor != Base.BaseOffset)
3463*9880d681SAndroid Build Coastguard Worker       continue;
3464*9880d681SAndroid Build Coastguard Worker     // If the offset will be truncated at this use, check that it is in bounds.
3465*9880d681SAndroid Build Coastguard Worker     if (!IntTy->isPointerTy() &&
3466*9880d681SAndroid Build Coastguard Worker         !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
3467*9880d681SAndroid Build Coastguard Worker       continue;
3468*9880d681SAndroid Build Coastguard Worker 
3469*9880d681SAndroid Build Coastguard Worker     // Check that multiplying with the use offset doesn't overflow.
3470*9880d681SAndroid Build Coastguard Worker     int64_t Offset = LU.MinOffset;
3471*9880d681SAndroid Build Coastguard Worker     if (Offset == INT64_MIN && Factor == -1)
3472*9880d681SAndroid Build Coastguard Worker       continue;
3473*9880d681SAndroid Build Coastguard Worker     Offset = (uint64_t)Offset * Factor;
3474*9880d681SAndroid Build Coastguard Worker     if (Offset / Factor != LU.MinOffset)
3475*9880d681SAndroid Build Coastguard Worker       continue;
3476*9880d681SAndroid Build Coastguard Worker     // If the offset will be truncated at this use, check that it is in bounds.
3477*9880d681SAndroid Build Coastguard Worker     if (!IntTy->isPointerTy() &&
3478*9880d681SAndroid Build Coastguard Worker         !ConstantInt::isValueValidForType(IntTy, Offset))
3479*9880d681SAndroid Build Coastguard Worker       continue;
3480*9880d681SAndroid Build Coastguard Worker 
3481*9880d681SAndroid Build Coastguard Worker     Formula F = Base;
3482*9880d681SAndroid Build Coastguard Worker     F.BaseOffset = NewBaseOffset;
3483*9880d681SAndroid Build Coastguard Worker 
3484*9880d681SAndroid Build Coastguard Worker     // Check that this scale is legal.
3485*9880d681SAndroid Build Coastguard Worker     if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
3486*9880d681SAndroid Build Coastguard Worker       continue;
3487*9880d681SAndroid Build Coastguard Worker 
3488*9880d681SAndroid Build Coastguard Worker     // Compensate for the use having MinOffset built into it.
3489*9880d681SAndroid Build Coastguard Worker     F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
3490*9880d681SAndroid Build Coastguard Worker 
3491*9880d681SAndroid Build Coastguard Worker     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3492*9880d681SAndroid Build Coastguard Worker 
3493*9880d681SAndroid Build Coastguard Worker     // Check that multiplying with each base register doesn't overflow.
3494*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
3495*9880d681SAndroid Build Coastguard Worker       F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
3496*9880d681SAndroid Build Coastguard Worker       if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
3497*9880d681SAndroid Build Coastguard Worker         goto next;
3498*9880d681SAndroid Build Coastguard Worker     }
3499*9880d681SAndroid Build Coastguard Worker 
3500*9880d681SAndroid Build Coastguard Worker     // Check that multiplying with the scaled register doesn't overflow.
3501*9880d681SAndroid Build Coastguard Worker     if (F.ScaledReg) {
3502*9880d681SAndroid Build Coastguard Worker       F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
3503*9880d681SAndroid Build Coastguard Worker       if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
3504*9880d681SAndroid Build Coastguard Worker         continue;
3505*9880d681SAndroid Build Coastguard Worker     }
3506*9880d681SAndroid Build Coastguard Worker 
3507*9880d681SAndroid Build Coastguard Worker     // Check that multiplying with the unfolded offset doesn't overflow.
3508*9880d681SAndroid Build Coastguard Worker     if (F.UnfoldedOffset != 0) {
3509*9880d681SAndroid Build Coastguard Worker       if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
3510*9880d681SAndroid Build Coastguard Worker         continue;
3511*9880d681SAndroid Build Coastguard Worker       F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
3512*9880d681SAndroid Build Coastguard Worker       if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
3513*9880d681SAndroid Build Coastguard Worker         continue;
3514*9880d681SAndroid Build Coastguard Worker       // If the offset will be truncated, check that it is in bounds.
3515*9880d681SAndroid Build Coastguard Worker       if (!IntTy->isPointerTy() &&
3516*9880d681SAndroid Build Coastguard Worker           !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
3517*9880d681SAndroid Build Coastguard Worker         continue;
3518*9880d681SAndroid Build Coastguard Worker     }
3519*9880d681SAndroid Build Coastguard Worker 
3520*9880d681SAndroid Build Coastguard Worker     // If we make it here and it's legal, add it.
3521*9880d681SAndroid Build Coastguard Worker     (void)InsertFormula(LU, LUIdx, F);
3522*9880d681SAndroid Build Coastguard Worker   next:;
3523*9880d681SAndroid Build Coastguard Worker   }
3524*9880d681SAndroid Build Coastguard Worker }
3525*9880d681SAndroid Build Coastguard Worker 
3526*9880d681SAndroid Build Coastguard Worker /// Generate stride factor reuse formulae by making use of scaled-offset address
3527*9880d681SAndroid Build Coastguard Worker /// modes, for example.
GenerateScales(LSRUse & LU,unsigned LUIdx,Formula Base)3528*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
3529*9880d681SAndroid Build Coastguard Worker   // Determine the integer type for the base formula.
3530*9880d681SAndroid Build Coastguard Worker   Type *IntTy = Base.getType();
3531*9880d681SAndroid Build Coastguard Worker   if (!IntTy) return;
3532*9880d681SAndroid Build Coastguard Worker 
3533*9880d681SAndroid Build Coastguard Worker   // If this Formula already has a scaled register, we can't add another one.
3534*9880d681SAndroid Build Coastguard Worker   // Try to unscale the formula to generate a better scale.
3535*9880d681SAndroid Build Coastguard Worker   if (Base.Scale != 0 && !Base.unscale())
3536*9880d681SAndroid Build Coastguard Worker     return;
3537*9880d681SAndroid Build Coastguard Worker 
3538*9880d681SAndroid Build Coastguard Worker   assert(Base.Scale == 0 && "unscale did not did its job!");
3539*9880d681SAndroid Build Coastguard Worker 
3540*9880d681SAndroid Build Coastguard Worker   // Check each interesting stride.
3541*9880d681SAndroid Build Coastguard Worker   for (int64_t Factor : Factors) {
3542*9880d681SAndroid Build Coastguard Worker     Base.Scale = Factor;
3543*9880d681SAndroid Build Coastguard Worker     Base.HasBaseReg = Base.BaseRegs.size() > 1;
3544*9880d681SAndroid Build Coastguard Worker     // Check whether this scale is going to be legal.
3545*9880d681SAndroid Build Coastguard Worker     if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3546*9880d681SAndroid Build Coastguard Worker                     Base)) {
3547*9880d681SAndroid Build Coastguard Worker       // As a special-case, handle special out-of-loop Basic users specially.
3548*9880d681SAndroid Build Coastguard Worker       // TODO: Reconsider this special case.
3549*9880d681SAndroid Build Coastguard Worker       if (LU.Kind == LSRUse::Basic &&
3550*9880d681SAndroid Build Coastguard Worker           isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
3551*9880d681SAndroid Build Coastguard Worker                      LU.AccessTy, Base) &&
3552*9880d681SAndroid Build Coastguard Worker           LU.AllFixupsOutsideLoop)
3553*9880d681SAndroid Build Coastguard Worker         LU.Kind = LSRUse::Special;
3554*9880d681SAndroid Build Coastguard Worker       else
3555*9880d681SAndroid Build Coastguard Worker         continue;
3556*9880d681SAndroid Build Coastguard Worker     }
3557*9880d681SAndroid Build Coastguard Worker     // For an ICmpZero, negating a solitary base register won't lead to
3558*9880d681SAndroid Build Coastguard Worker     // new solutions.
3559*9880d681SAndroid Build Coastguard Worker     if (LU.Kind == LSRUse::ICmpZero &&
3560*9880d681SAndroid Build Coastguard Worker         !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
3561*9880d681SAndroid Build Coastguard Worker       continue;
3562*9880d681SAndroid Build Coastguard Worker     // For each addrec base reg, apply the scale, if possible.
3563*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
3564*9880d681SAndroid Build Coastguard Worker       if (const SCEVAddRecExpr *AR =
3565*9880d681SAndroid Build Coastguard Worker             dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
3566*9880d681SAndroid Build Coastguard Worker         const SCEV *FactorS = SE.getConstant(IntTy, Factor);
3567*9880d681SAndroid Build Coastguard Worker         if (FactorS->isZero())
3568*9880d681SAndroid Build Coastguard Worker           continue;
3569*9880d681SAndroid Build Coastguard Worker         // Divide out the factor, ignoring high bits, since we'll be
3570*9880d681SAndroid Build Coastguard Worker         // scaling the value back up in the end.
3571*9880d681SAndroid Build Coastguard Worker         if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
3572*9880d681SAndroid Build Coastguard Worker           // TODO: This could be optimized to avoid all the copying.
3573*9880d681SAndroid Build Coastguard Worker           Formula F = Base;
3574*9880d681SAndroid Build Coastguard Worker           F.ScaledReg = Quotient;
3575*9880d681SAndroid Build Coastguard Worker           F.deleteBaseReg(F.BaseRegs[i]);
3576*9880d681SAndroid Build Coastguard Worker           // The canonical representation of 1*reg is reg, which is already in
3577*9880d681SAndroid Build Coastguard Worker           // Base. In that case, do not try to insert the formula, it will be
3578*9880d681SAndroid Build Coastguard Worker           // rejected anyway.
3579*9880d681SAndroid Build Coastguard Worker           if (F.Scale == 1 && F.BaseRegs.empty())
3580*9880d681SAndroid Build Coastguard Worker             continue;
3581*9880d681SAndroid Build Coastguard Worker           (void)InsertFormula(LU, LUIdx, F);
3582*9880d681SAndroid Build Coastguard Worker         }
3583*9880d681SAndroid Build Coastguard Worker       }
3584*9880d681SAndroid Build Coastguard Worker   }
3585*9880d681SAndroid Build Coastguard Worker }
3586*9880d681SAndroid Build Coastguard Worker 
3587*9880d681SAndroid Build Coastguard Worker /// Generate reuse formulae from different IV types.
GenerateTruncates(LSRUse & LU,unsigned LUIdx,Formula Base)3588*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
3589*9880d681SAndroid Build Coastguard Worker   // Don't bother truncating symbolic values.
3590*9880d681SAndroid Build Coastguard Worker   if (Base.BaseGV) return;
3591*9880d681SAndroid Build Coastguard Worker 
3592*9880d681SAndroid Build Coastguard Worker   // Determine the integer type for the base formula.
3593*9880d681SAndroid Build Coastguard Worker   Type *DstTy = Base.getType();
3594*9880d681SAndroid Build Coastguard Worker   if (!DstTy) return;
3595*9880d681SAndroid Build Coastguard Worker   DstTy = SE.getEffectiveSCEVType(DstTy);
3596*9880d681SAndroid Build Coastguard Worker 
3597*9880d681SAndroid Build Coastguard Worker   for (Type *SrcTy : Types) {
3598*9880d681SAndroid Build Coastguard Worker     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
3599*9880d681SAndroid Build Coastguard Worker       Formula F = Base;
3600*9880d681SAndroid Build Coastguard Worker 
3601*9880d681SAndroid Build Coastguard Worker       if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
3602*9880d681SAndroid Build Coastguard Worker       for (const SCEV *&BaseReg : F.BaseRegs)
3603*9880d681SAndroid Build Coastguard Worker         BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
3604*9880d681SAndroid Build Coastguard Worker 
3605*9880d681SAndroid Build Coastguard Worker       // TODO: This assumes we've done basic processing on all uses and
3606*9880d681SAndroid Build Coastguard Worker       // have an idea what the register usage is.
3607*9880d681SAndroid Build Coastguard Worker       if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
3608*9880d681SAndroid Build Coastguard Worker         continue;
3609*9880d681SAndroid Build Coastguard Worker 
3610*9880d681SAndroid Build Coastguard Worker       (void)InsertFormula(LU, LUIdx, F);
3611*9880d681SAndroid Build Coastguard Worker     }
3612*9880d681SAndroid Build Coastguard Worker   }
3613*9880d681SAndroid Build Coastguard Worker }
3614*9880d681SAndroid Build Coastguard Worker 
3615*9880d681SAndroid Build Coastguard Worker namespace {
3616*9880d681SAndroid Build Coastguard Worker 
3617*9880d681SAndroid Build Coastguard Worker /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
3618*9880d681SAndroid Build Coastguard Worker /// modifications so that the search phase doesn't have to worry about the data
3619*9880d681SAndroid Build Coastguard Worker /// structures moving underneath it.
3620*9880d681SAndroid Build Coastguard Worker struct WorkItem {
3621*9880d681SAndroid Build Coastguard Worker   size_t LUIdx;
3622*9880d681SAndroid Build Coastguard Worker   int64_t Imm;
3623*9880d681SAndroid Build Coastguard Worker   const SCEV *OrigReg;
3624*9880d681SAndroid Build Coastguard Worker 
WorkItem__anon6e4801cc0911::WorkItem3625*9880d681SAndroid Build Coastguard Worker   WorkItem(size_t LI, int64_t I, const SCEV *R)
3626*9880d681SAndroid Build Coastguard Worker     : LUIdx(LI), Imm(I), OrigReg(R) {}
3627*9880d681SAndroid Build Coastguard Worker 
3628*9880d681SAndroid Build Coastguard Worker   void print(raw_ostream &OS) const;
3629*9880d681SAndroid Build Coastguard Worker   void dump() const;
3630*9880d681SAndroid Build Coastguard Worker };
3631*9880d681SAndroid Build Coastguard Worker 
3632*9880d681SAndroid Build Coastguard Worker }
3633*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const3634*9880d681SAndroid Build Coastguard Worker void WorkItem::print(raw_ostream &OS) const {
3635*9880d681SAndroid Build Coastguard Worker   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
3636*9880d681SAndroid Build Coastguard Worker      << " , add offset " << Imm;
3637*9880d681SAndroid Build Coastguard Worker }
3638*9880d681SAndroid Build Coastguard Worker 
3639*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const3640*9880d681SAndroid Build Coastguard Worker void WorkItem::dump() const {
3641*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
3642*9880d681SAndroid Build Coastguard Worker }
3643*9880d681SAndroid Build Coastguard Worker 
3644*9880d681SAndroid Build Coastguard Worker /// Look for registers which are a constant distance apart and try to form reuse
3645*9880d681SAndroid Build Coastguard Worker /// opportunities between them.
GenerateCrossUseConstantOffsets()3646*9880d681SAndroid Build Coastguard Worker void LSRInstance::GenerateCrossUseConstantOffsets() {
3647*9880d681SAndroid Build Coastguard Worker   // Group the registers by their value without any added constant offset.
3648*9880d681SAndroid Build Coastguard Worker   typedef std::map<int64_t, const SCEV *> ImmMapTy;
3649*9880d681SAndroid Build Coastguard Worker   DenseMap<const SCEV *, ImmMapTy> Map;
3650*9880d681SAndroid Build Coastguard Worker   DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
3651*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 8> Sequence;
3652*9880d681SAndroid Build Coastguard Worker   for (const SCEV *Use : RegUses) {
3653*9880d681SAndroid Build Coastguard Worker     const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
3654*9880d681SAndroid Build Coastguard Worker     int64_t Imm = ExtractImmediate(Reg, SE);
3655*9880d681SAndroid Build Coastguard Worker     auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
3656*9880d681SAndroid Build Coastguard Worker     if (Pair.second)
3657*9880d681SAndroid Build Coastguard Worker       Sequence.push_back(Reg);
3658*9880d681SAndroid Build Coastguard Worker     Pair.first->second.insert(std::make_pair(Imm, Use));
3659*9880d681SAndroid Build Coastguard Worker     UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
3660*9880d681SAndroid Build Coastguard Worker   }
3661*9880d681SAndroid Build Coastguard Worker 
3662*9880d681SAndroid Build Coastguard Worker   // Now examine each set of registers with the same base value. Build up
3663*9880d681SAndroid Build Coastguard Worker   // a list of work to do and do the work in a separate step so that we're
3664*9880d681SAndroid Build Coastguard Worker   // not adding formulae and register counts while we're searching.
3665*9880d681SAndroid Build Coastguard Worker   SmallVector<WorkItem, 32> WorkItems;
3666*9880d681SAndroid Build Coastguard Worker   SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
3667*9880d681SAndroid Build Coastguard Worker   for (const SCEV *Reg : Sequence) {
3668*9880d681SAndroid Build Coastguard Worker     const ImmMapTy &Imms = Map.find(Reg)->second;
3669*9880d681SAndroid Build Coastguard Worker 
3670*9880d681SAndroid Build Coastguard Worker     // It's not worthwhile looking for reuse if there's only one offset.
3671*9880d681SAndroid Build Coastguard Worker     if (Imms.size() == 1)
3672*9880d681SAndroid Build Coastguard Worker       continue;
3673*9880d681SAndroid Build Coastguard Worker 
3674*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
3675*9880d681SAndroid Build Coastguard Worker           for (const auto &Entry : Imms)
3676*9880d681SAndroid Build Coastguard Worker             dbgs() << ' ' << Entry.first;
3677*9880d681SAndroid Build Coastguard Worker           dbgs() << '\n');
3678*9880d681SAndroid Build Coastguard Worker 
3679*9880d681SAndroid Build Coastguard Worker     // Examine each offset.
3680*9880d681SAndroid Build Coastguard Worker     for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
3681*9880d681SAndroid Build Coastguard Worker          J != JE; ++J) {
3682*9880d681SAndroid Build Coastguard Worker       const SCEV *OrigReg = J->second;
3683*9880d681SAndroid Build Coastguard Worker 
3684*9880d681SAndroid Build Coastguard Worker       int64_t JImm = J->first;
3685*9880d681SAndroid Build Coastguard Worker       const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
3686*9880d681SAndroid Build Coastguard Worker 
3687*9880d681SAndroid Build Coastguard Worker       if (!isa<SCEVConstant>(OrigReg) &&
3688*9880d681SAndroid Build Coastguard Worker           UsedByIndicesMap[Reg].count() == 1) {
3689*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
3690*9880d681SAndroid Build Coastguard Worker         continue;
3691*9880d681SAndroid Build Coastguard Worker       }
3692*9880d681SAndroid Build Coastguard Worker 
3693*9880d681SAndroid Build Coastguard Worker       // Conservatively examine offsets between this orig reg a few selected
3694*9880d681SAndroid Build Coastguard Worker       // other orig regs.
3695*9880d681SAndroid Build Coastguard Worker       ImmMapTy::const_iterator OtherImms[] = {
3696*9880d681SAndroid Build Coastguard Worker         Imms.begin(), std::prev(Imms.end()),
3697*9880d681SAndroid Build Coastguard Worker         Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
3698*9880d681SAndroid Build Coastguard Worker                          2)
3699*9880d681SAndroid Build Coastguard Worker       };
3700*9880d681SAndroid Build Coastguard Worker       for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
3701*9880d681SAndroid Build Coastguard Worker         ImmMapTy::const_iterator M = OtherImms[i];
3702*9880d681SAndroid Build Coastguard Worker         if (M == J || M == JE) continue;
3703*9880d681SAndroid Build Coastguard Worker 
3704*9880d681SAndroid Build Coastguard Worker         // Compute the difference between the two.
3705*9880d681SAndroid Build Coastguard Worker         int64_t Imm = (uint64_t)JImm - M->first;
3706*9880d681SAndroid Build Coastguard Worker         for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
3707*9880d681SAndroid Build Coastguard Worker              LUIdx = UsedByIndices.find_next(LUIdx))
3708*9880d681SAndroid Build Coastguard Worker           // Make a memo of this use, offset, and register tuple.
3709*9880d681SAndroid Build Coastguard Worker           if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
3710*9880d681SAndroid Build Coastguard Worker             WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
3711*9880d681SAndroid Build Coastguard Worker       }
3712*9880d681SAndroid Build Coastguard Worker     }
3713*9880d681SAndroid Build Coastguard Worker   }
3714*9880d681SAndroid Build Coastguard Worker 
3715*9880d681SAndroid Build Coastguard Worker   Map.clear();
3716*9880d681SAndroid Build Coastguard Worker   Sequence.clear();
3717*9880d681SAndroid Build Coastguard Worker   UsedByIndicesMap.clear();
3718*9880d681SAndroid Build Coastguard Worker   UniqueItems.clear();
3719*9880d681SAndroid Build Coastguard Worker 
3720*9880d681SAndroid Build Coastguard Worker   // Now iterate through the worklist and add new formulae.
3721*9880d681SAndroid Build Coastguard Worker   for (const WorkItem &WI : WorkItems) {
3722*9880d681SAndroid Build Coastguard Worker     size_t LUIdx = WI.LUIdx;
3723*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
3724*9880d681SAndroid Build Coastguard Worker     int64_t Imm = WI.Imm;
3725*9880d681SAndroid Build Coastguard Worker     const SCEV *OrigReg = WI.OrigReg;
3726*9880d681SAndroid Build Coastguard Worker 
3727*9880d681SAndroid Build Coastguard Worker     Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
3728*9880d681SAndroid Build Coastguard Worker     const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
3729*9880d681SAndroid Build Coastguard Worker     unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
3730*9880d681SAndroid Build Coastguard Worker 
3731*9880d681SAndroid Build Coastguard Worker     // TODO: Use a more targeted data structure.
3732*9880d681SAndroid Build Coastguard Worker     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
3733*9880d681SAndroid Build Coastguard Worker       Formula F = LU.Formulae[L];
3734*9880d681SAndroid Build Coastguard Worker       // FIXME: The code for the scaled and unscaled registers looks
3735*9880d681SAndroid Build Coastguard Worker       // very similar but slightly different. Investigate if they
3736*9880d681SAndroid Build Coastguard Worker       // could be merged. That way, we would not have to unscale the
3737*9880d681SAndroid Build Coastguard Worker       // Formula.
3738*9880d681SAndroid Build Coastguard Worker       F.unscale();
3739*9880d681SAndroid Build Coastguard Worker       // Use the immediate in the scaled register.
3740*9880d681SAndroid Build Coastguard Worker       if (F.ScaledReg == OrigReg) {
3741*9880d681SAndroid Build Coastguard Worker         int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
3742*9880d681SAndroid Build Coastguard Worker         // Don't create 50 + reg(-50).
3743*9880d681SAndroid Build Coastguard Worker         if (F.referencesReg(SE.getSCEV(
3744*9880d681SAndroid Build Coastguard Worker                    ConstantInt::get(IntTy, -(uint64_t)Offset))))
3745*9880d681SAndroid Build Coastguard Worker           continue;
3746*9880d681SAndroid Build Coastguard Worker         Formula NewF = F;
3747*9880d681SAndroid Build Coastguard Worker         NewF.BaseOffset = Offset;
3748*9880d681SAndroid Build Coastguard Worker         if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
3749*9880d681SAndroid Build Coastguard Worker                         NewF))
3750*9880d681SAndroid Build Coastguard Worker           continue;
3751*9880d681SAndroid Build Coastguard Worker         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
3752*9880d681SAndroid Build Coastguard Worker 
3753*9880d681SAndroid Build Coastguard Worker         // If the new scale is a constant in a register, and adding the constant
3754*9880d681SAndroid Build Coastguard Worker         // value to the immediate would produce a value closer to zero than the
3755*9880d681SAndroid Build Coastguard Worker         // immediate itself, then the formula isn't worthwhile.
3756*9880d681SAndroid Build Coastguard Worker         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
3757*9880d681SAndroid Build Coastguard Worker           if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
3758*9880d681SAndroid Build Coastguard Worker               (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
3759*9880d681SAndroid Build Coastguard Worker                   .ule(std::abs(NewF.BaseOffset)))
3760*9880d681SAndroid Build Coastguard Worker             continue;
3761*9880d681SAndroid Build Coastguard Worker 
3762*9880d681SAndroid Build Coastguard Worker         // OK, looks good.
3763*9880d681SAndroid Build Coastguard Worker         NewF.canonicalize();
3764*9880d681SAndroid Build Coastguard Worker         (void)InsertFormula(LU, LUIdx, NewF);
3765*9880d681SAndroid Build Coastguard Worker       } else {
3766*9880d681SAndroid Build Coastguard Worker         // Use the immediate in a base register.
3767*9880d681SAndroid Build Coastguard Worker         for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
3768*9880d681SAndroid Build Coastguard Worker           const SCEV *BaseReg = F.BaseRegs[N];
3769*9880d681SAndroid Build Coastguard Worker           if (BaseReg != OrigReg)
3770*9880d681SAndroid Build Coastguard Worker             continue;
3771*9880d681SAndroid Build Coastguard Worker           Formula NewF = F;
3772*9880d681SAndroid Build Coastguard Worker           NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
3773*9880d681SAndroid Build Coastguard Worker           if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
3774*9880d681SAndroid Build Coastguard Worker                           LU.Kind, LU.AccessTy, NewF)) {
3775*9880d681SAndroid Build Coastguard Worker             if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
3776*9880d681SAndroid Build Coastguard Worker               continue;
3777*9880d681SAndroid Build Coastguard Worker             NewF = F;
3778*9880d681SAndroid Build Coastguard Worker             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
3779*9880d681SAndroid Build Coastguard Worker           }
3780*9880d681SAndroid Build Coastguard Worker           NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
3781*9880d681SAndroid Build Coastguard Worker 
3782*9880d681SAndroid Build Coastguard Worker           // If the new formula has a constant in a register, and adding the
3783*9880d681SAndroid Build Coastguard Worker           // constant value to the immediate would produce a value closer to
3784*9880d681SAndroid Build Coastguard Worker           // zero than the immediate itself, then the formula isn't worthwhile.
3785*9880d681SAndroid Build Coastguard Worker           for (const SCEV *NewReg : NewF.BaseRegs)
3786*9880d681SAndroid Build Coastguard Worker             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
3787*9880d681SAndroid Build Coastguard Worker               if ((C->getAPInt() + NewF.BaseOffset)
3788*9880d681SAndroid Build Coastguard Worker                       .abs()
3789*9880d681SAndroid Build Coastguard Worker                       .slt(std::abs(NewF.BaseOffset)) &&
3790*9880d681SAndroid Build Coastguard Worker                   (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
3791*9880d681SAndroid Build Coastguard Worker                       countTrailingZeros<uint64_t>(NewF.BaseOffset))
3792*9880d681SAndroid Build Coastguard Worker                 goto skip_formula;
3793*9880d681SAndroid Build Coastguard Worker 
3794*9880d681SAndroid Build Coastguard Worker           // Ok, looks good.
3795*9880d681SAndroid Build Coastguard Worker           NewF.canonicalize();
3796*9880d681SAndroid Build Coastguard Worker           (void)InsertFormula(LU, LUIdx, NewF);
3797*9880d681SAndroid Build Coastguard Worker           break;
3798*9880d681SAndroid Build Coastguard Worker         skip_formula:;
3799*9880d681SAndroid Build Coastguard Worker         }
3800*9880d681SAndroid Build Coastguard Worker       }
3801*9880d681SAndroid Build Coastguard Worker     }
3802*9880d681SAndroid Build Coastguard Worker   }
3803*9880d681SAndroid Build Coastguard Worker }
3804*9880d681SAndroid Build Coastguard Worker 
3805*9880d681SAndroid Build Coastguard Worker /// Generate formulae for each use.
3806*9880d681SAndroid Build Coastguard Worker void
GenerateAllReuseFormulae()3807*9880d681SAndroid Build Coastguard Worker LSRInstance::GenerateAllReuseFormulae() {
3808*9880d681SAndroid Build Coastguard Worker   // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
3809*9880d681SAndroid Build Coastguard Worker   // queries are more precise.
3810*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3811*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
3812*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3813*9880d681SAndroid Build Coastguard Worker       GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
3814*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3815*9880d681SAndroid Build Coastguard Worker       GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
3816*9880d681SAndroid Build Coastguard Worker   }
3817*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3818*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
3819*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3820*9880d681SAndroid Build Coastguard Worker       GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
3821*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3822*9880d681SAndroid Build Coastguard Worker       GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
3823*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3824*9880d681SAndroid Build Coastguard Worker       GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
3825*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3826*9880d681SAndroid Build Coastguard Worker       GenerateScales(LU, LUIdx, LU.Formulae[i]);
3827*9880d681SAndroid Build Coastguard Worker   }
3828*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3829*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
3830*9880d681SAndroid Build Coastguard Worker     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
3831*9880d681SAndroid Build Coastguard Worker       GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
3832*9880d681SAndroid Build Coastguard Worker   }
3833*9880d681SAndroid Build Coastguard Worker 
3834*9880d681SAndroid Build Coastguard Worker   GenerateCrossUseConstantOffsets();
3835*9880d681SAndroid Build Coastguard Worker 
3836*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "\n"
3837*9880d681SAndroid Build Coastguard Worker                   "After generating reuse formulae:\n";
3838*9880d681SAndroid Build Coastguard Worker         print_uses(dbgs()));
3839*9880d681SAndroid Build Coastguard Worker }
3840*9880d681SAndroid Build Coastguard Worker 
3841*9880d681SAndroid Build Coastguard Worker /// If there are multiple formulae with the same set of registers used
3842*9880d681SAndroid Build Coastguard Worker /// by other uses, pick the best one and delete the others.
FilterOutUndesirableDedicatedRegisters()3843*9880d681SAndroid Build Coastguard Worker void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
3844*9880d681SAndroid Build Coastguard Worker   DenseSet<const SCEV *> VisitedRegs;
3845*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 16> Regs;
3846*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 16> LoserRegs;
3847*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
3848*9880d681SAndroid Build Coastguard Worker   bool ChangedFormulae = false;
3849*9880d681SAndroid Build Coastguard Worker #endif
3850*9880d681SAndroid Build Coastguard Worker 
3851*9880d681SAndroid Build Coastguard Worker   // Collect the best formula for each unique set of shared registers. This
3852*9880d681SAndroid Build Coastguard Worker   // is reset for each use.
3853*9880d681SAndroid Build Coastguard Worker   typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
3854*9880d681SAndroid Build Coastguard Worker     BestFormulaeTy;
3855*9880d681SAndroid Build Coastguard Worker   BestFormulaeTy BestFormulae;
3856*9880d681SAndroid Build Coastguard Worker 
3857*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3858*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
3859*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
3860*9880d681SAndroid Build Coastguard Worker 
3861*9880d681SAndroid Build Coastguard Worker     bool Any = false;
3862*9880d681SAndroid Build Coastguard Worker     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
3863*9880d681SAndroid Build Coastguard Worker          FIdx != NumForms; ++FIdx) {
3864*9880d681SAndroid Build Coastguard Worker       Formula &F = LU.Formulae[FIdx];
3865*9880d681SAndroid Build Coastguard Worker 
3866*9880d681SAndroid Build Coastguard Worker       // Some formulas are instant losers. For example, they may depend on
3867*9880d681SAndroid Build Coastguard Worker       // nonexistent AddRecs from other loops. These need to be filtered
3868*9880d681SAndroid Build Coastguard Worker       // immediately, otherwise heuristics could choose them over others leading
3869*9880d681SAndroid Build Coastguard Worker       // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
3870*9880d681SAndroid Build Coastguard Worker       // avoids the need to recompute this information across formulae using the
3871*9880d681SAndroid Build Coastguard Worker       // same bad AddRec. Passing LoserRegs is also essential unless we remove
3872*9880d681SAndroid Build Coastguard Worker       // the corresponding bad register from the Regs set.
3873*9880d681SAndroid Build Coastguard Worker       Cost CostF;
3874*9880d681SAndroid Build Coastguard Worker       Regs.clear();
3875*9880d681SAndroid Build Coastguard Worker       CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
3876*9880d681SAndroid Build Coastguard Worker                         &LoserRegs);
3877*9880d681SAndroid Build Coastguard Worker       if (CostF.isLoser()) {
3878*9880d681SAndroid Build Coastguard Worker         // During initial formula generation, undesirable formulae are generated
3879*9880d681SAndroid Build Coastguard Worker         // by uses within other loops that have some non-trivial address mode or
3880*9880d681SAndroid Build Coastguard Worker         // use the postinc form of the IV. LSR needs to provide these formulae
3881*9880d681SAndroid Build Coastguard Worker         // as the basis of rediscovering the desired formula that uses an AddRec
3882*9880d681SAndroid Build Coastguard Worker         // corresponding to the existing phi. Once all formulae have been
3883*9880d681SAndroid Build Coastguard Worker         // generated, these initial losers may be pruned.
3884*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
3885*9880d681SAndroid Build Coastguard Worker               dbgs() << "\n");
3886*9880d681SAndroid Build Coastguard Worker       }
3887*9880d681SAndroid Build Coastguard Worker       else {
3888*9880d681SAndroid Build Coastguard Worker         SmallVector<const SCEV *, 4> Key;
3889*9880d681SAndroid Build Coastguard Worker         for (const SCEV *Reg : F.BaseRegs) {
3890*9880d681SAndroid Build Coastguard Worker           if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
3891*9880d681SAndroid Build Coastguard Worker             Key.push_back(Reg);
3892*9880d681SAndroid Build Coastguard Worker         }
3893*9880d681SAndroid Build Coastguard Worker         if (F.ScaledReg &&
3894*9880d681SAndroid Build Coastguard Worker             RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
3895*9880d681SAndroid Build Coastguard Worker           Key.push_back(F.ScaledReg);
3896*9880d681SAndroid Build Coastguard Worker         // Unstable sort by host order ok, because this is only used for
3897*9880d681SAndroid Build Coastguard Worker         // uniquifying.
3898*9880d681SAndroid Build Coastguard Worker         std::sort(Key.begin(), Key.end());
3899*9880d681SAndroid Build Coastguard Worker 
3900*9880d681SAndroid Build Coastguard Worker         std::pair<BestFormulaeTy::const_iterator, bool> P =
3901*9880d681SAndroid Build Coastguard Worker           BestFormulae.insert(std::make_pair(Key, FIdx));
3902*9880d681SAndroid Build Coastguard Worker         if (P.second)
3903*9880d681SAndroid Build Coastguard Worker           continue;
3904*9880d681SAndroid Build Coastguard Worker 
3905*9880d681SAndroid Build Coastguard Worker         Formula &Best = LU.Formulae[P.first->second];
3906*9880d681SAndroid Build Coastguard Worker 
3907*9880d681SAndroid Build Coastguard Worker         Cost CostBest;
3908*9880d681SAndroid Build Coastguard Worker         Regs.clear();
3909*9880d681SAndroid Build Coastguard Worker         CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
3910*9880d681SAndroid Build Coastguard Worker                              DT, LU);
3911*9880d681SAndroid Build Coastguard Worker         if (CostF < CostBest)
3912*9880d681SAndroid Build Coastguard Worker           std::swap(F, Best);
3913*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
3914*9880d681SAndroid Build Coastguard Worker               dbgs() << "\n"
3915*9880d681SAndroid Build Coastguard Worker                         "    in favor of formula "; Best.print(dbgs());
3916*9880d681SAndroid Build Coastguard Worker               dbgs() << '\n');
3917*9880d681SAndroid Build Coastguard Worker       }
3918*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
3919*9880d681SAndroid Build Coastguard Worker       ChangedFormulae = true;
3920*9880d681SAndroid Build Coastguard Worker #endif
3921*9880d681SAndroid Build Coastguard Worker       LU.DeleteFormula(F);
3922*9880d681SAndroid Build Coastguard Worker       --FIdx;
3923*9880d681SAndroid Build Coastguard Worker       --NumForms;
3924*9880d681SAndroid Build Coastguard Worker       Any = true;
3925*9880d681SAndroid Build Coastguard Worker     }
3926*9880d681SAndroid Build Coastguard Worker 
3927*9880d681SAndroid Build Coastguard Worker     // Now that we've filtered out some formulae, recompute the Regs set.
3928*9880d681SAndroid Build Coastguard Worker     if (Any)
3929*9880d681SAndroid Build Coastguard Worker       LU.RecomputeRegs(LUIdx, RegUses);
3930*9880d681SAndroid Build Coastguard Worker 
3931*9880d681SAndroid Build Coastguard Worker     // Reset this to prepare for the next use.
3932*9880d681SAndroid Build Coastguard Worker     BestFormulae.clear();
3933*9880d681SAndroid Build Coastguard Worker   }
3934*9880d681SAndroid Build Coastguard Worker 
3935*9880d681SAndroid Build Coastguard Worker   DEBUG(if (ChangedFormulae) {
3936*9880d681SAndroid Build Coastguard Worker           dbgs() << "\n"
3937*9880d681SAndroid Build Coastguard Worker                     "After filtering out undesirable candidates:\n";
3938*9880d681SAndroid Build Coastguard Worker           print_uses(dbgs());
3939*9880d681SAndroid Build Coastguard Worker         });
3940*9880d681SAndroid Build Coastguard Worker }
3941*9880d681SAndroid Build Coastguard Worker 
3942*9880d681SAndroid Build Coastguard Worker // This is a rough guess that seems to work fairly well.
3943*9880d681SAndroid Build Coastguard Worker static const size_t ComplexityLimit = UINT16_MAX;
3944*9880d681SAndroid Build Coastguard Worker 
3945*9880d681SAndroid Build Coastguard Worker /// Estimate the worst-case number of solutions the solver might have to
3946*9880d681SAndroid Build Coastguard Worker /// consider. It almost never considers this many solutions because it prune the
3947*9880d681SAndroid Build Coastguard Worker /// search space, but the pruning isn't always sufficient.
EstimateSearchSpaceComplexity() const3948*9880d681SAndroid Build Coastguard Worker size_t LSRInstance::EstimateSearchSpaceComplexity() const {
3949*9880d681SAndroid Build Coastguard Worker   size_t Power = 1;
3950*9880d681SAndroid Build Coastguard Worker   for (const LSRUse &LU : Uses) {
3951*9880d681SAndroid Build Coastguard Worker     size_t FSize = LU.Formulae.size();
3952*9880d681SAndroid Build Coastguard Worker     if (FSize >= ComplexityLimit) {
3953*9880d681SAndroid Build Coastguard Worker       Power = ComplexityLimit;
3954*9880d681SAndroid Build Coastguard Worker       break;
3955*9880d681SAndroid Build Coastguard Worker     }
3956*9880d681SAndroid Build Coastguard Worker     Power *= FSize;
3957*9880d681SAndroid Build Coastguard Worker     if (Power >= ComplexityLimit)
3958*9880d681SAndroid Build Coastguard Worker       break;
3959*9880d681SAndroid Build Coastguard Worker   }
3960*9880d681SAndroid Build Coastguard Worker   return Power;
3961*9880d681SAndroid Build Coastguard Worker }
3962*9880d681SAndroid Build Coastguard Worker 
3963*9880d681SAndroid Build Coastguard Worker /// When one formula uses a superset of the registers of another formula, it
3964*9880d681SAndroid Build Coastguard Worker /// won't help reduce register pressure (though it may not necessarily hurt
3965*9880d681SAndroid Build Coastguard Worker /// register pressure); remove it to simplify the system.
NarrowSearchSpaceByDetectingSupersets()3966*9880d681SAndroid Build Coastguard Worker void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
3967*9880d681SAndroid Build Coastguard Worker   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
3968*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "The search space is too complex.\n");
3969*9880d681SAndroid Build Coastguard Worker 
3970*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
3971*9880d681SAndroid Build Coastguard Worker                     "which use a superset of registers used by other "
3972*9880d681SAndroid Build Coastguard Worker                     "formulae.\n");
3973*9880d681SAndroid Build Coastguard Worker 
3974*9880d681SAndroid Build Coastguard Worker     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
3975*9880d681SAndroid Build Coastguard Worker       LSRUse &LU = Uses[LUIdx];
3976*9880d681SAndroid Build Coastguard Worker       bool Any = false;
3977*9880d681SAndroid Build Coastguard Worker       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
3978*9880d681SAndroid Build Coastguard Worker         Formula &F = LU.Formulae[i];
3979*9880d681SAndroid Build Coastguard Worker         // Look for a formula with a constant or GV in a register. If the use
3980*9880d681SAndroid Build Coastguard Worker         // also has a formula with that same value in an immediate field,
3981*9880d681SAndroid Build Coastguard Worker         // delete the one that uses a register.
3982*9880d681SAndroid Build Coastguard Worker         for (SmallVectorImpl<const SCEV *>::const_iterator
3983*9880d681SAndroid Build Coastguard Worker              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
3984*9880d681SAndroid Build Coastguard Worker           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
3985*9880d681SAndroid Build Coastguard Worker             Formula NewF = F;
3986*9880d681SAndroid Build Coastguard Worker             NewF.BaseOffset += C->getValue()->getSExtValue();
3987*9880d681SAndroid Build Coastguard Worker             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
3988*9880d681SAndroid Build Coastguard Worker                                 (I - F.BaseRegs.begin()));
3989*9880d681SAndroid Build Coastguard Worker             if (LU.HasFormulaWithSameRegs(NewF)) {
3990*9880d681SAndroid Build Coastguard Worker               DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
3991*9880d681SAndroid Build Coastguard Worker               LU.DeleteFormula(F);
3992*9880d681SAndroid Build Coastguard Worker               --i;
3993*9880d681SAndroid Build Coastguard Worker               --e;
3994*9880d681SAndroid Build Coastguard Worker               Any = true;
3995*9880d681SAndroid Build Coastguard Worker               break;
3996*9880d681SAndroid Build Coastguard Worker             }
3997*9880d681SAndroid Build Coastguard Worker           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
3998*9880d681SAndroid Build Coastguard Worker             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
3999*9880d681SAndroid Build Coastguard Worker               if (!F.BaseGV) {
4000*9880d681SAndroid Build Coastguard Worker                 Formula NewF = F;
4001*9880d681SAndroid Build Coastguard Worker                 NewF.BaseGV = GV;
4002*9880d681SAndroid Build Coastguard Worker                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4003*9880d681SAndroid Build Coastguard Worker                                     (I - F.BaseRegs.begin()));
4004*9880d681SAndroid Build Coastguard Worker                 if (LU.HasFormulaWithSameRegs(NewF)) {
4005*9880d681SAndroid Build Coastguard Worker                   DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
4006*9880d681SAndroid Build Coastguard Worker                         dbgs() << '\n');
4007*9880d681SAndroid Build Coastguard Worker                   LU.DeleteFormula(F);
4008*9880d681SAndroid Build Coastguard Worker                   --i;
4009*9880d681SAndroid Build Coastguard Worker                   --e;
4010*9880d681SAndroid Build Coastguard Worker                   Any = true;
4011*9880d681SAndroid Build Coastguard Worker                   break;
4012*9880d681SAndroid Build Coastguard Worker                 }
4013*9880d681SAndroid Build Coastguard Worker               }
4014*9880d681SAndroid Build Coastguard Worker           }
4015*9880d681SAndroid Build Coastguard Worker         }
4016*9880d681SAndroid Build Coastguard Worker       }
4017*9880d681SAndroid Build Coastguard Worker       if (Any)
4018*9880d681SAndroid Build Coastguard Worker         LU.RecomputeRegs(LUIdx, RegUses);
4019*9880d681SAndroid Build Coastguard Worker     }
4020*9880d681SAndroid Build Coastguard Worker 
4021*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "After pre-selection:\n";
4022*9880d681SAndroid Build Coastguard Worker           print_uses(dbgs()));
4023*9880d681SAndroid Build Coastguard Worker   }
4024*9880d681SAndroid Build Coastguard Worker }
4025*9880d681SAndroid Build Coastguard Worker 
4026*9880d681SAndroid Build Coastguard Worker /// When there are many registers for expressions like A, A+1, A+2, etc.,
4027*9880d681SAndroid Build Coastguard Worker /// allocate a single register for them.
NarrowSearchSpaceByCollapsingUnrolledCode()4028*9880d681SAndroid Build Coastguard Worker void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4029*9880d681SAndroid Build Coastguard Worker   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4030*9880d681SAndroid Build Coastguard Worker     return;
4031*9880d681SAndroid Build Coastguard Worker 
4032*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "The search space is too complex.\n"
4033*9880d681SAndroid Build Coastguard Worker                   "Narrowing the search space by assuming that uses separated "
4034*9880d681SAndroid Build Coastguard Worker                   "by a constant offset will use the same registers.\n");
4035*9880d681SAndroid Build Coastguard Worker 
4036*9880d681SAndroid Build Coastguard Worker   // This is especially useful for unrolled loops.
4037*9880d681SAndroid Build Coastguard Worker 
4038*9880d681SAndroid Build Coastguard Worker   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4039*9880d681SAndroid Build Coastguard Worker     LSRUse &LU = Uses[LUIdx];
4040*9880d681SAndroid Build Coastguard Worker     for (const Formula &F : LU.Formulae) {
4041*9880d681SAndroid Build Coastguard Worker       if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
4042*9880d681SAndroid Build Coastguard Worker         continue;
4043*9880d681SAndroid Build Coastguard Worker 
4044*9880d681SAndroid Build Coastguard Worker       LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4045*9880d681SAndroid Build Coastguard Worker       if (!LUThatHas)
4046*9880d681SAndroid Build Coastguard Worker         continue;
4047*9880d681SAndroid Build Coastguard Worker 
4048*9880d681SAndroid Build Coastguard Worker       if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
4049*9880d681SAndroid Build Coastguard Worker                               LU.Kind, LU.AccessTy))
4050*9880d681SAndroid Build Coastguard Worker         continue;
4051*9880d681SAndroid Build Coastguard Worker 
4052*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
4053*9880d681SAndroid Build Coastguard Worker 
4054*9880d681SAndroid Build Coastguard Worker       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4055*9880d681SAndroid Build Coastguard Worker 
4056*9880d681SAndroid Build Coastguard Worker       // Update the relocs to reference the new use.
4057*9880d681SAndroid Build Coastguard Worker       for (LSRFixup &Fixup : Fixups) {
4058*9880d681SAndroid Build Coastguard Worker         if (Fixup.LUIdx == LUIdx) {
4059*9880d681SAndroid Build Coastguard Worker           Fixup.LUIdx = LUThatHas - &Uses.front();
4060*9880d681SAndroid Build Coastguard Worker           Fixup.Offset += F.BaseOffset;
4061*9880d681SAndroid Build Coastguard Worker           // Add the new offset to LUThatHas' offset list.
4062*9880d681SAndroid Build Coastguard Worker           if (LUThatHas->Offsets.back() != Fixup.Offset) {
4063*9880d681SAndroid Build Coastguard Worker             LUThatHas->Offsets.push_back(Fixup.Offset);
4064*9880d681SAndroid Build Coastguard Worker             if (Fixup.Offset > LUThatHas->MaxOffset)
4065*9880d681SAndroid Build Coastguard Worker               LUThatHas->MaxOffset = Fixup.Offset;
4066*9880d681SAndroid Build Coastguard Worker             if (Fixup.Offset < LUThatHas->MinOffset)
4067*9880d681SAndroid Build Coastguard Worker               LUThatHas->MinOffset = Fixup.Offset;
4068*9880d681SAndroid Build Coastguard Worker           }
4069*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
4070*9880d681SAndroid Build Coastguard Worker         }
4071*9880d681SAndroid Build Coastguard Worker         if (Fixup.LUIdx == NumUses-1)
4072*9880d681SAndroid Build Coastguard Worker           Fixup.LUIdx = LUIdx;
4073*9880d681SAndroid Build Coastguard Worker       }
4074*9880d681SAndroid Build Coastguard Worker 
4075*9880d681SAndroid Build Coastguard Worker       // Delete formulae from the new use which are no longer legal.
4076*9880d681SAndroid Build Coastguard Worker       bool Any = false;
4077*9880d681SAndroid Build Coastguard Worker       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4078*9880d681SAndroid Build Coastguard Worker         Formula &F = LUThatHas->Formulae[i];
4079*9880d681SAndroid Build Coastguard Worker         if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4080*9880d681SAndroid Build Coastguard Worker                         LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4081*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
4082*9880d681SAndroid Build Coastguard Worker                 dbgs() << '\n');
4083*9880d681SAndroid Build Coastguard Worker           LUThatHas->DeleteFormula(F);
4084*9880d681SAndroid Build Coastguard Worker           --i;
4085*9880d681SAndroid Build Coastguard Worker           --e;
4086*9880d681SAndroid Build Coastguard Worker           Any = true;
4087*9880d681SAndroid Build Coastguard Worker         }
4088*9880d681SAndroid Build Coastguard Worker       }
4089*9880d681SAndroid Build Coastguard Worker 
4090*9880d681SAndroid Build Coastguard Worker       if (Any)
4091*9880d681SAndroid Build Coastguard Worker         LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
4092*9880d681SAndroid Build Coastguard Worker 
4093*9880d681SAndroid Build Coastguard Worker       // Delete the old use.
4094*9880d681SAndroid Build Coastguard Worker       DeleteUse(LU, LUIdx);
4095*9880d681SAndroid Build Coastguard Worker       --LUIdx;
4096*9880d681SAndroid Build Coastguard Worker       --NumUses;
4097*9880d681SAndroid Build Coastguard Worker       break;
4098*9880d681SAndroid Build Coastguard Worker     }
4099*9880d681SAndroid Build Coastguard Worker   }
4100*9880d681SAndroid Build Coastguard Worker 
4101*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4102*9880d681SAndroid Build Coastguard Worker }
4103*9880d681SAndroid Build Coastguard Worker 
4104*9880d681SAndroid Build Coastguard Worker /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4105*9880d681SAndroid Build Coastguard Worker /// we've done more filtering, as it may be able to find more formulae to
4106*9880d681SAndroid Build Coastguard Worker /// eliminate.
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters()4107*9880d681SAndroid Build Coastguard Worker void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4108*9880d681SAndroid Build Coastguard Worker   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4109*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "The search space is too complex.\n");
4110*9880d681SAndroid Build Coastguard Worker 
4111*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4112*9880d681SAndroid Build Coastguard Worker                     "undesirable dedicated registers.\n");
4113*9880d681SAndroid Build Coastguard Worker 
4114*9880d681SAndroid Build Coastguard Worker     FilterOutUndesirableDedicatedRegisters();
4115*9880d681SAndroid Build Coastguard Worker 
4116*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "After pre-selection:\n";
4117*9880d681SAndroid Build Coastguard Worker           print_uses(dbgs()));
4118*9880d681SAndroid Build Coastguard Worker   }
4119*9880d681SAndroid Build Coastguard Worker }
4120*9880d681SAndroid Build Coastguard Worker 
4121*9880d681SAndroid Build Coastguard Worker /// Pick a register which seems likely to be profitable, and then in any use
4122*9880d681SAndroid Build Coastguard Worker /// which has any reference to that register, delete all formulae which do not
4123*9880d681SAndroid Build Coastguard Worker /// reference that register.
NarrowSearchSpaceByPickingWinnerRegs()4124*9880d681SAndroid Build Coastguard Worker void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
4125*9880d681SAndroid Build Coastguard Worker   // With all other options exhausted, loop until the system is simple
4126*9880d681SAndroid Build Coastguard Worker   // enough to handle.
4127*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 4> Taken;
4128*9880d681SAndroid Build Coastguard Worker   while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4129*9880d681SAndroid Build Coastguard Worker     // Ok, we have too many of formulae on our hands to conveniently handle.
4130*9880d681SAndroid Build Coastguard Worker     // Use a rough heuristic to thin out the list.
4131*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "The search space is too complex.\n");
4132*9880d681SAndroid Build Coastguard Worker 
4133*9880d681SAndroid Build Coastguard Worker     // Pick the register which is used by the most LSRUses, which is likely
4134*9880d681SAndroid Build Coastguard Worker     // to be a good reuse register candidate.
4135*9880d681SAndroid Build Coastguard Worker     const SCEV *Best = nullptr;
4136*9880d681SAndroid Build Coastguard Worker     unsigned BestNum = 0;
4137*9880d681SAndroid Build Coastguard Worker     for (const SCEV *Reg : RegUses) {
4138*9880d681SAndroid Build Coastguard Worker       if (Taken.count(Reg))
4139*9880d681SAndroid Build Coastguard Worker         continue;
4140*9880d681SAndroid Build Coastguard Worker       if (!Best)
4141*9880d681SAndroid Build Coastguard Worker         Best = Reg;
4142*9880d681SAndroid Build Coastguard Worker       else {
4143*9880d681SAndroid Build Coastguard Worker         unsigned Count = RegUses.getUsedByIndices(Reg).count();
4144*9880d681SAndroid Build Coastguard Worker         if (Count > BestNum) {
4145*9880d681SAndroid Build Coastguard Worker           Best = Reg;
4146*9880d681SAndroid Build Coastguard Worker           BestNum = Count;
4147*9880d681SAndroid Build Coastguard Worker         }
4148*9880d681SAndroid Build Coastguard Worker       }
4149*9880d681SAndroid Build Coastguard Worker     }
4150*9880d681SAndroid Build Coastguard Worker 
4151*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
4152*9880d681SAndroid Build Coastguard Worker                  << " will yield profitable reuse.\n");
4153*9880d681SAndroid Build Coastguard Worker     Taken.insert(Best);
4154*9880d681SAndroid Build Coastguard Worker 
4155*9880d681SAndroid Build Coastguard Worker     // In any use with formulae which references this register, delete formulae
4156*9880d681SAndroid Build Coastguard Worker     // which don't reference it.
4157*9880d681SAndroid Build Coastguard Worker     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4158*9880d681SAndroid Build Coastguard Worker       LSRUse &LU = Uses[LUIdx];
4159*9880d681SAndroid Build Coastguard Worker       if (!LU.Regs.count(Best)) continue;
4160*9880d681SAndroid Build Coastguard Worker 
4161*9880d681SAndroid Build Coastguard Worker       bool Any = false;
4162*9880d681SAndroid Build Coastguard Worker       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4163*9880d681SAndroid Build Coastguard Worker         Formula &F = LU.Formulae[i];
4164*9880d681SAndroid Build Coastguard Worker         if (!F.referencesReg(Best)) {
4165*9880d681SAndroid Build Coastguard Worker           DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
4166*9880d681SAndroid Build Coastguard Worker           LU.DeleteFormula(F);
4167*9880d681SAndroid Build Coastguard Worker           --e;
4168*9880d681SAndroid Build Coastguard Worker           --i;
4169*9880d681SAndroid Build Coastguard Worker           Any = true;
4170*9880d681SAndroid Build Coastguard Worker           assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
4171*9880d681SAndroid Build Coastguard Worker           continue;
4172*9880d681SAndroid Build Coastguard Worker         }
4173*9880d681SAndroid Build Coastguard Worker       }
4174*9880d681SAndroid Build Coastguard Worker 
4175*9880d681SAndroid Build Coastguard Worker       if (Any)
4176*9880d681SAndroid Build Coastguard Worker         LU.RecomputeRegs(LUIdx, RegUses);
4177*9880d681SAndroid Build Coastguard Worker     }
4178*9880d681SAndroid Build Coastguard Worker 
4179*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "After pre-selection:\n";
4180*9880d681SAndroid Build Coastguard Worker           print_uses(dbgs()));
4181*9880d681SAndroid Build Coastguard Worker   }
4182*9880d681SAndroid Build Coastguard Worker }
4183*9880d681SAndroid Build Coastguard Worker 
4184*9880d681SAndroid Build Coastguard Worker /// If there are an extraordinary number of formulae to choose from, use some
4185*9880d681SAndroid Build Coastguard Worker /// rough heuristics to prune down the number of formulae. This keeps the main
4186*9880d681SAndroid Build Coastguard Worker /// solver from taking an extraordinary amount of time in some worst-case
4187*9880d681SAndroid Build Coastguard Worker /// scenarios.
NarrowSearchSpaceUsingHeuristics()4188*9880d681SAndroid Build Coastguard Worker void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
4189*9880d681SAndroid Build Coastguard Worker   NarrowSearchSpaceByDetectingSupersets();
4190*9880d681SAndroid Build Coastguard Worker   NarrowSearchSpaceByCollapsingUnrolledCode();
4191*9880d681SAndroid Build Coastguard Worker   NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
4192*9880d681SAndroid Build Coastguard Worker   NarrowSearchSpaceByPickingWinnerRegs();
4193*9880d681SAndroid Build Coastguard Worker }
4194*9880d681SAndroid Build Coastguard Worker 
4195*9880d681SAndroid Build Coastguard Worker /// This is the recursive solver.
SolveRecurse(SmallVectorImpl<const Formula * > & Solution,Cost & SolutionCost,SmallVectorImpl<const Formula * > & Workspace,const Cost & CurCost,const SmallPtrSet<const SCEV *,16> & CurRegs,DenseSet<const SCEV * > & VisitedRegs) const4196*9880d681SAndroid Build Coastguard Worker void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
4197*9880d681SAndroid Build Coastguard Worker                                Cost &SolutionCost,
4198*9880d681SAndroid Build Coastguard Worker                                SmallVectorImpl<const Formula *> &Workspace,
4199*9880d681SAndroid Build Coastguard Worker                                const Cost &CurCost,
4200*9880d681SAndroid Build Coastguard Worker                                const SmallPtrSet<const SCEV *, 16> &CurRegs,
4201*9880d681SAndroid Build Coastguard Worker                                DenseSet<const SCEV *> &VisitedRegs) const {
4202*9880d681SAndroid Build Coastguard Worker   // Some ideas:
4203*9880d681SAndroid Build Coastguard Worker   //  - prune more:
4204*9880d681SAndroid Build Coastguard Worker   //    - use more aggressive filtering
4205*9880d681SAndroid Build Coastguard Worker   //    - sort the formula so that the most profitable solutions are found first
4206*9880d681SAndroid Build Coastguard Worker   //    - sort the uses too
4207*9880d681SAndroid Build Coastguard Worker   //  - search faster:
4208*9880d681SAndroid Build Coastguard Worker   //    - don't compute a cost, and then compare. compare while computing a cost
4209*9880d681SAndroid Build Coastguard Worker   //      and bail early.
4210*9880d681SAndroid Build Coastguard Worker   //    - track register sets with SmallBitVector
4211*9880d681SAndroid Build Coastguard Worker 
4212*9880d681SAndroid Build Coastguard Worker   const LSRUse &LU = Uses[Workspace.size()];
4213*9880d681SAndroid Build Coastguard Worker 
4214*9880d681SAndroid Build Coastguard Worker   // If this use references any register that's already a part of the
4215*9880d681SAndroid Build Coastguard Worker   // in-progress solution, consider it a requirement that a formula must
4216*9880d681SAndroid Build Coastguard Worker   // reference that register in order to be considered. This prunes out
4217*9880d681SAndroid Build Coastguard Worker   // unprofitable searching.
4218*9880d681SAndroid Build Coastguard Worker   SmallSetVector<const SCEV *, 4> ReqRegs;
4219*9880d681SAndroid Build Coastguard Worker   for (const SCEV *S : CurRegs)
4220*9880d681SAndroid Build Coastguard Worker     if (LU.Regs.count(S))
4221*9880d681SAndroid Build Coastguard Worker       ReqRegs.insert(S);
4222*9880d681SAndroid Build Coastguard Worker 
4223*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 16> NewRegs;
4224*9880d681SAndroid Build Coastguard Worker   Cost NewCost;
4225*9880d681SAndroid Build Coastguard Worker   for (const Formula &F : LU.Formulae) {
4226*9880d681SAndroid Build Coastguard Worker     // Ignore formulae which may not be ideal in terms of register reuse of
4227*9880d681SAndroid Build Coastguard Worker     // ReqRegs.  The formula should use all required registers before
4228*9880d681SAndroid Build Coastguard Worker     // introducing new ones.
4229*9880d681SAndroid Build Coastguard Worker     int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
4230*9880d681SAndroid Build Coastguard Worker     for (const SCEV *Reg : ReqRegs) {
4231*9880d681SAndroid Build Coastguard Worker       if ((F.ScaledReg && F.ScaledReg == Reg) ||
4232*9880d681SAndroid Build Coastguard Worker           std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
4233*9880d681SAndroid Build Coastguard Worker           F.BaseRegs.end()) {
4234*9880d681SAndroid Build Coastguard Worker         --NumReqRegsToFind;
4235*9880d681SAndroid Build Coastguard Worker         if (NumReqRegsToFind == 0)
4236*9880d681SAndroid Build Coastguard Worker           break;
4237*9880d681SAndroid Build Coastguard Worker       }
4238*9880d681SAndroid Build Coastguard Worker     }
4239*9880d681SAndroid Build Coastguard Worker     if (NumReqRegsToFind != 0) {
4240*9880d681SAndroid Build Coastguard Worker       // If none of the formulae satisfied the required registers, then we could
4241*9880d681SAndroid Build Coastguard Worker       // clear ReqRegs and try again. Currently, we simply give up in this case.
4242*9880d681SAndroid Build Coastguard Worker       continue;
4243*9880d681SAndroid Build Coastguard Worker     }
4244*9880d681SAndroid Build Coastguard Worker 
4245*9880d681SAndroid Build Coastguard Worker     // Evaluate the cost of the current formula. If it's already worse than
4246*9880d681SAndroid Build Coastguard Worker     // the current best, prune the search at that point.
4247*9880d681SAndroid Build Coastguard Worker     NewCost = CurCost;
4248*9880d681SAndroid Build Coastguard Worker     NewRegs = CurRegs;
4249*9880d681SAndroid Build Coastguard Worker     NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
4250*9880d681SAndroid Build Coastguard Worker                         LU);
4251*9880d681SAndroid Build Coastguard Worker     if (NewCost < SolutionCost) {
4252*9880d681SAndroid Build Coastguard Worker       Workspace.push_back(&F);
4253*9880d681SAndroid Build Coastguard Worker       if (Workspace.size() != Uses.size()) {
4254*9880d681SAndroid Build Coastguard Worker         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
4255*9880d681SAndroid Build Coastguard Worker                      NewRegs, VisitedRegs);
4256*9880d681SAndroid Build Coastguard Worker         if (F.getNumRegs() == 1 && Workspace.size() == 1)
4257*9880d681SAndroid Build Coastguard Worker           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
4258*9880d681SAndroid Build Coastguard Worker       } else {
4259*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
4260*9880d681SAndroid Build Coastguard Worker               dbgs() << ".\n Regs:";
4261*9880d681SAndroid Build Coastguard Worker               for (const SCEV *S : NewRegs)
4262*9880d681SAndroid Build Coastguard Worker                 dbgs() << ' ' << *S;
4263*9880d681SAndroid Build Coastguard Worker               dbgs() << '\n');
4264*9880d681SAndroid Build Coastguard Worker 
4265*9880d681SAndroid Build Coastguard Worker         SolutionCost = NewCost;
4266*9880d681SAndroid Build Coastguard Worker         Solution = Workspace;
4267*9880d681SAndroid Build Coastguard Worker       }
4268*9880d681SAndroid Build Coastguard Worker       Workspace.pop_back();
4269*9880d681SAndroid Build Coastguard Worker     }
4270*9880d681SAndroid Build Coastguard Worker   }
4271*9880d681SAndroid Build Coastguard Worker }
4272*9880d681SAndroid Build Coastguard Worker 
4273*9880d681SAndroid Build Coastguard Worker /// Choose one formula from each use. Return the results in the given Solution
4274*9880d681SAndroid Build Coastguard Worker /// vector.
Solve(SmallVectorImpl<const Formula * > & Solution) const4275*9880d681SAndroid Build Coastguard Worker void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
4276*9880d681SAndroid Build Coastguard Worker   SmallVector<const Formula *, 8> Workspace;
4277*9880d681SAndroid Build Coastguard Worker   Cost SolutionCost;
4278*9880d681SAndroid Build Coastguard Worker   SolutionCost.Lose();
4279*9880d681SAndroid Build Coastguard Worker   Cost CurCost;
4280*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<const SCEV *, 16> CurRegs;
4281*9880d681SAndroid Build Coastguard Worker   DenseSet<const SCEV *> VisitedRegs;
4282*9880d681SAndroid Build Coastguard Worker   Workspace.reserve(Uses.size());
4283*9880d681SAndroid Build Coastguard Worker 
4284*9880d681SAndroid Build Coastguard Worker   // SolveRecurse does all the work.
4285*9880d681SAndroid Build Coastguard Worker   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
4286*9880d681SAndroid Build Coastguard Worker                CurRegs, VisitedRegs);
4287*9880d681SAndroid Build Coastguard Worker   if (Solution.empty()) {
4288*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
4289*9880d681SAndroid Build Coastguard Worker     return;
4290*9880d681SAndroid Build Coastguard Worker   }
4291*9880d681SAndroid Build Coastguard Worker 
4292*9880d681SAndroid Build Coastguard Worker   // Ok, we've now made all our decisions.
4293*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "\n"
4294*9880d681SAndroid Build Coastguard Worker                   "The chosen solution requires "; SolutionCost.print(dbgs());
4295*9880d681SAndroid Build Coastguard Worker         dbgs() << ":\n";
4296*9880d681SAndroid Build Coastguard Worker         for (size_t i = 0, e = Uses.size(); i != e; ++i) {
4297*9880d681SAndroid Build Coastguard Worker           dbgs() << "  ";
4298*9880d681SAndroid Build Coastguard Worker           Uses[i].print(dbgs());
4299*9880d681SAndroid Build Coastguard Worker           dbgs() << "\n"
4300*9880d681SAndroid Build Coastguard Worker                     "    ";
4301*9880d681SAndroid Build Coastguard Worker           Solution[i]->print(dbgs());
4302*9880d681SAndroid Build Coastguard Worker           dbgs() << '\n';
4303*9880d681SAndroid Build Coastguard Worker         });
4304*9880d681SAndroid Build Coastguard Worker 
4305*9880d681SAndroid Build Coastguard Worker   assert(Solution.size() == Uses.size() && "Malformed solution!");
4306*9880d681SAndroid Build Coastguard Worker }
4307*9880d681SAndroid Build Coastguard Worker 
4308*9880d681SAndroid Build Coastguard Worker /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
4309*9880d681SAndroid Build Coastguard Worker /// we can go while still being dominated by the input positions. This helps
4310*9880d681SAndroid Build Coastguard Worker /// canonicalize the insert position, which encourages sharing.
4311*9880d681SAndroid Build Coastguard Worker BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,const SmallVectorImpl<Instruction * > & Inputs) const4312*9880d681SAndroid Build Coastguard Worker LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
4313*9880d681SAndroid Build Coastguard Worker                                  const SmallVectorImpl<Instruction *> &Inputs)
4314*9880d681SAndroid Build Coastguard Worker                                                                          const {
4315*9880d681SAndroid Build Coastguard Worker   Instruction *Tentative = &*IP;
4316*9880d681SAndroid Build Coastguard Worker   for (;;) {
4317*9880d681SAndroid Build Coastguard Worker     bool AllDominate = true;
4318*9880d681SAndroid Build Coastguard Worker     Instruction *BetterPos = nullptr;
4319*9880d681SAndroid Build Coastguard Worker     // Don't bother attempting to insert before a catchswitch, their basic block
4320*9880d681SAndroid Build Coastguard Worker     // cannot have other non-PHI instructions.
4321*9880d681SAndroid Build Coastguard Worker     if (isa<CatchSwitchInst>(Tentative))
4322*9880d681SAndroid Build Coastguard Worker       return IP;
4323*9880d681SAndroid Build Coastguard Worker 
4324*9880d681SAndroid Build Coastguard Worker     for (Instruction *Inst : Inputs) {
4325*9880d681SAndroid Build Coastguard Worker       if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
4326*9880d681SAndroid Build Coastguard Worker         AllDominate = false;
4327*9880d681SAndroid Build Coastguard Worker         break;
4328*9880d681SAndroid Build Coastguard Worker       }
4329*9880d681SAndroid Build Coastguard Worker       // Attempt to find an insert position in the middle of the block,
4330*9880d681SAndroid Build Coastguard Worker       // instead of at the end, so that it can be used for other expansions.
4331*9880d681SAndroid Build Coastguard Worker       if (Tentative->getParent() == Inst->getParent() &&
4332*9880d681SAndroid Build Coastguard Worker           (!BetterPos || !DT.dominates(Inst, BetterPos)))
4333*9880d681SAndroid Build Coastguard Worker         BetterPos = &*std::next(BasicBlock::iterator(Inst));
4334*9880d681SAndroid Build Coastguard Worker     }
4335*9880d681SAndroid Build Coastguard Worker     if (!AllDominate)
4336*9880d681SAndroid Build Coastguard Worker       break;
4337*9880d681SAndroid Build Coastguard Worker     if (BetterPos)
4338*9880d681SAndroid Build Coastguard Worker       IP = BetterPos->getIterator();
4339*9880d681SAndroid Build Coastguard Worker     else
4340*9880d681SAndroid Build Coastguard Worker       IP = Tentative->getIterator();
4341*9880d681SAndroid Build Coastguard Worker 
4342*9880d681SAndroid Build Coastguard Worker     const Loop *IPLoop = LI.getLoopFor(IP->getParent());
4343*9880d681SAndroid Build Coastguard Worker     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
4344*9880d681SAndroid Build Coastguard Worker 
4345*9880d681SAndroid Build Coastguard Worker     BasicBlock *IDom;
4346*9880d681SAndroid Build Coastguard Worker     for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
4347*9880d681SAndroid Build Coastguard Worker       if (!Rung) return IP;
4348*9880d681SAndroid Build Coastguard Worker       Rung = Rung->getIDom();
4349*9880d681SAndroid Build Coastguard Worker       if (!Rung) return IP;
4350*9880d681SAndroid Build Coastguard Worker       IDom = Rung->getBlock();
4351*9880d681SAndroid Build Coastguard Worker 
4352*9880d681SAndroid Build Coastguard Worker       // Don't climb into a loop though.
4353*9880d681SAndroid Build Coastguard Worker       const Loop *IDomLoop = LI.getLoopFor(IDom);
4354*9880d681SAndroid Build Coastguard Worker       unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
4355*9880d681SAndroid Build Coastguard Worker       if (IDomDepth <= IPLoopDepth &&
4356*9880d681SAndroid Build Coastguard Worker           (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
4357*9880d681SAndroid Build Coastguard Worker         break;
4358*9880d681SAndroid Build Coastguard Worker     }
4359*9880d681SAndroid Build Coastguard Worker 
4360*9880d681SAndroid Build Coastguard Worker     Tentative = IDom->getTerminator();
4361*9880d681SAndroid Build Coastguard Worker   }
4362*9880d681SAndroid Build Coastguard Worker 
4363*9880d681SAndroid Build Coastguard Worker   return IP;
4364*9880d681SAndroid Build Coastguard Worker }
4365*9880d681SAndroid Build Coastguard Worker 
4366*9880d681SAndroid Build Coastguard Worker /// Determine an input position which will be dominated by the operands and
4367*9880d681SAndroid Build Coastguard Worker /// which will dominate the result.
4368*9880d681SAndroid Build Coastguard Worker BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,const LSRFixup & LF,const LSRUse & LU,SCEVExpander & Rewriter) const4369*9880d681SAndroid Build Coastguard Worker LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
4370*9880d681SAndroid Build Coastguard Worker                                            const LSRFixup &LF,
4371*9880d681SAndroid Build Coastguard Worker                                            const LSRUse &LU,
4372*9880d681SAndroid Build Coastguard Worker                                            SCEVExpander &Rewriter) const {
4373*9880d681SAndroid Build Coastguard Worker   // Collect some instructions which must be dominated by the
4374*9880d681SAndroid Build Coastguard Worker   // expanding replacement. These must be dominated by any operands that
4375*9880d681SAndroid Build Coastguard Worker   // will be required in the expansion.
4376*9880d681SAndroid Build Coastguard Worker   SmallVector<Instruction *, 4> Inputs;
4377*9880d681SAndroid Build Coastguard Worker   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
4378*9880d681SAndroid Build Coastguard Worker     Inputs.push_back(I);
4379*9880d681SAndroid Build Coastguard Worker   if (LU.Kind == LSRUse::ICmpZero)
4380*9880d681SAndroid Build Coastguard Worker     if (Instruction *I =
4381*9880d681SAndroid Build Coastguard Worker           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
4382*9880d681SAndroid Build Coastguard Worker       Inputs.push_back(I);
4383*9880d681SAndroid Build Coastguard Worker   if (LF.PostIncLoops.count(L)) {
4384*9880d681SAndroid Build Coastguard Worker     if (LF.isUseFullyOutsideLoop(L))
4385*9880d681SAndroid Build Coastguard Worker       Inputs.push_back(L->getLoopLatch()->getTerminator());
4386*9880d681SAndroid Build Coastguard Worker     else
4387*9880d681SAndroid Build Coastguard Worker       Inputs.push_back(IVIncInsertPos);
4388*9880d681SAndroid Build Coastguard Worker   }
4389*9880d681SAndroid Build Coastguard Worker   // The expansion must also be dominated by the increment positions of any
4390*9880d681SAndroid Build Coastguard Worker   // loops it for which it is using post-inc mode.
4391*9880d681SAndroid Build Coastguard Worker   for (const Loop *PIL : LF.PostIncLoops) {
4392*9880d681SAndroid Build Coastguard Worker     if (PIL == L) continue;
4393*9880d681SAndroid Build Coastguard Worker 
4394*9880d681SAndroid Build Coastguard Worker     // Be dominated by the loop exit.
4395*9880d681SAndroid Build Coastguard Worker     SmallVector<BasicBlock *, 4> ExitingBlocks;
4396*9880d681SAndroid Build Coastguard Worker     PIL->getExitingBlocks(ExitingBlocks);
4397*9880d681SAndroid Build Coastguard Worker     if (!ExitingBlocks.empty()) {
4398*9880d681SAndroid Build Coastguard Worker       BasicBlock *BB = ExitingBlocks[0];
4399*9880d681SAndroid Build Coastguard Worker       for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
4400*9880d681SAndroid Build Coastguard Worker         BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
4401*9880d681SAndroid Build Coastguard Worker       Inputs.push_back(BB->getTerminator());
4402*9880d681SAndroid Build Coastguard Worker     }
4403*9880d681SAndroid Build Coastguard Worker   }
4404*9880d681SAndroid Build Coastguard Worker 
4405*9880d681SAndroid Build Coastguard Worker   assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
4406*9880d681SAndroid Build Coastguard Worker          && !isa<DbgInfoIntrinsic>(LowestIP) &&
4407*9880d681SAndroid Build Coastguard Worker          "Insertion point must be a normal instruction");
4408*9880d681SAndroid Build Coastguard Worker 
4409*9880d681SAndroid Build Coastguard Worker   // Then, climb up the immediate dominator tree as far as we can go while
4410*9880d681SAndroid Build Coastguard Worker   // still being dominated by the input positions.
4411*9880d681SAndroid Build Coastguard Worker   BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
4412*9880d681SAndroid Build Coastguard Worker 
4413*9880d681SAndroid Build Coastguard Worker   // Don't insert instructions before PHI nodes.
4414*9880d681SAndroid Build Coastguard Worker   while (isa<PHINode>(IP)) ++IP;
4415*9880d681SAndroid Build Coastguard Worker 
4416*9880d681SAndroid Build Coastguard Worker   // Ignore landingpad instructions.
4417*9880d681SAndroid Build Coastguard Worker   while (IP->isEHPad()) ++IP;
4418*9880d681SAndroid Build Coastguard Worker 
4419*9880d681SAndroid Build Coastguard Worker   // Ignore debug intrinsics.
4420*9880d681SAndroid Build Coastguard Worker   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
4421*9880d681SAndroid Build Coastguard Worker 
4422*9880d681SAndroid Build Coastguard Worker   // Set IP below instructions recently inserted by SCEVExpander. This keeps the
4423*9880d681SAndroid Build Coastguard Worker   // IP consistent across expansions and allows the previously inserted
4424*9880d681SAndroid Build Coastguard Worker   // instructions to be reused by subsequent expansion.
4425*9880d681SAndroid Build Coastguard Worker   while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
4426*9880d681SAndroid Build Coastguard Worker     ++IP;
4427*9880d681SAndroid Build Coastguard Worker 
4428*9880d681SAndroid Build Coastguard Worker   return IP;
4429*9880d681SAndroid Build Coastguard Worker }
4430*9880d681SAndroid Build Coastguard Worker 
4431*9880d681SAndroid Build Coastguard Worker /// Emit instructions for the leading candidate expression for this LSRUse (this
4432*9880d681SAndroid Build Coastguard Worker /// is called "expanding").
Expand(const LSRFixup & LF,const Formula & F,BasicBlock::iterator IP,SCEVExpander & Rewriter,SmallVectorImpl<WeakVH> & DeadInsts) const4433*9880d681SAndroid Build Coastguard Worker Value *LSRInstance::Expand(const LSRFixup &LF,
4434*9880d681SAndroid Build Coastguard Worker                            const Formula &F,
4435*9880d681SAndroid Build Coastguard Worker                            BasicBlock::iterator IP,
4436*9880d681SAndroid Build Coastguard Worker                            SCEVExpander &Rewriter,
4437*9880d681SAndroid Build Coastguard Worker                            SmallVectorImpl<WeakVH> &DeadInsts) const {
4438*9880d681SAndroid Build Coastguard Worker   const LSRUse &LU = Uses[LF.LUIdx];
4439*9880d681SAndroid Build Coastguard Worker   if (LU.RigidFormula)
4440*9880d681SAndroid Build Coastguard Worker     return LF.OperandValToReplace;
4441*9880d681SAndroid Build Coastguard Worker 
4442*9880d681SAndroid Build Coastguard Worker   // Determine an input position which will be dominated by the operands and
4443*9880d681SAndroid Build Coastguard Worker   // which will dominate the result.
4444*9880d681SAndroid Build Coastguard Worker   IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
4445*9880d681SAndroid Build Coastguard Worker 
4446*9880d681SAndroid Build Coastguard Worker   // Inform the Rewriter if we have a post-increment use, so that it can
4447*9880d681SAndroid Build Coastguard Worker   // perform an advantageous expansion.
4448*9880d681SAndroid Build Coastguard Worker   Rewriter.setPostInc(LF.PostIncLoops);
4449*9880d681SAndroid Build Coastguard Worker 
4450*9880d681SAndroid Build Coastguard Worker   // This is the type that the user actually needs.
4451*9880d681SAndroid Build Coastguard Worker   Type *OpTy = LF.OperandValToReplace->getType();
4452*9880d681SAndroid Build Coastguard Worker   // This will be the type that we'll initially expand to.
4453*9880d681SAndroid Build Coastguard Worker   Type *Ty = F.getType();
4454*9880d681SAndroid Build Coastguard Worker   if (!Ty)
4455*9880d681SAndroid Build Coastguard Worker     // No type known; just expand directly to the ultimate type.
4456*9880d681SAndroid Build Coastguard Worker     Ty = OpTy;
4457*9880d681SAndroid Build Coastguard Worker   else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
4458*9880d681SAndroid Build Coastguard Worker     // Expand directly to the ultimate type if it's the right size.
4459*9880d681SAndroid Build Coastguard Worker     Ty = OpTy;
4460*9880d681SAndroid Build Coastguard Worker   // This is the type to do integer arithmetic in.
4461*9880d681SAndroid Build Coastguard Worker   Type *IntTy = SE.getEffectiveSCEVType(Ty);
4462*9880d681SAndroid Build Coastguard Worker 
4463*9880d681SAndroid Build Coastguard Worker   // Build up a list of operands to add together to form the full base.
4464*9880d681SAndroid Build Coastguard Worker   SmallVector<const SCEV *, 8> Ops;
4465*9880d681SAndroid Build Coastguard Worker 
4466*9880d681SAndroid Build Coastguard Worker   // Expand the BaseRegs portion.
4467*9880d681SAndroid Build Coastguard Worker   for (const SCEV *Reg : F.BaseRegs) {
4468*9880d681SAndroid Build Coastguard Worker     assert(!Reg->isZero() && "Zero allocated in a base register!");
4469*9880d681SAndroid Build Coastguard Worker 
4470*9880d681SAndroid Build Coastguard Worker     // If we're expanding for a post-inc user, make the post-inc adjustment.
4471*9880d681SAndroid Build Coastguard Worker     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4472*9880d681SAndroid Build Coastguard Worker     Reg = TransformForPostIncUse(Denormalize, Reg,
4473*9880d681SAndroid Build Coastguard Worker                                  LF.UserInst, LF.OperandValToReplace,
4474*9880d681SAndroid Build Coastguard Worker                                  Loops, SE, DT);
4475*9880d681SAndroid Build Coastguard Worker 
4476*9880d681SAndroid Build Coastguard Worker     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
4477*9880d681SAndroid Build Coastguard Worker   }
4478*9880d681SAndroid Build Coastguard Worker 
4479*9880d681SAndroid Build Coastguard Worker   // Expand the ScaledReg portion.
4480*9880d681SAndroid Build Coastguard Worker   Value *ICmpScaledV = nullptr;
4481*9880d681SAndroid Build Coastguard Worker   if (F.Scale != 0) {
4482*9880d681SAndroid Build Coastguard Worker     const SCEV *ScaledS = F.ScaledReg;
4483*9880d681SAndroid Build Coastguard Worker 
4484*9880d681SAndroid Build Coastguard Worker     // If we're expanding for a post-inc user, make the post-inc adjustment.
4485*9880d681SAndroid Build Coastguard Worker     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
4486*9880d681SAndroid Build Coastguard Worker     ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
4487*9880d681SAndroid Build Coastguard Worker                                      LF.UserInst, LF.OperandValToReplace,
4488*9880d681SAndroid Build Coastguard Worker                                      Loops, SE, DT);
4489*9880d681SAndroid Build Coastguard Worker 
4490*9880d681SAndroid Build Coastguard Worker     if (LU.Kind == LSRUse::ICmpZero) {
4491*9880d681SAndroid Build Coastguard Worker       // Expand ScaleReg as if it was part of the base regs.
4492*9880d681SAndroid Build Coastguard Worker       if (F.Scale == 1)
4493*9880d681SAndroid Build Coastguard Worker         Ops.push_back(
4494*9880d681SAndroid Build Coastguard Worker             SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
4495*9880d681SAndroid Build Coastguard Worker       else {
4496*9880d681SAndroid Build Coastguard Worker         // An interesting way of "folding" with an icmp is to use a negated
4497*9880d681SAndroid Build Coastguard Worker         // scale, which we'll implement by inserting it into the other operand
4498*9880d681SAndroid Build Coastguard Worker         // of the icmp.
4499*9880d681SAndroid Build Coastguard Worker         assert(F.Scale == -1 &&
4500*9880d681SAndroid Build Coastguard Worker                "The only scale supported by ICmpZero uses is -1!");
4501*9880d681SAndroid Build Coastguard Worker         ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
4502*9880d681SAndroid Build Coastguard Worker       }
4503*9880d681SAndroid Build Coastguard Worker     } else {
4504*9880d681SAndroid Build Coastguard Worker       // Otherwise just expand the scaled register and an explicit scale,
4505*9880d681SAndroid Build Coastguard Worker       // which is expected to be matched as part of the address.
4506*9880d681SAndroid Build Coastguard Worker 
4507*9880d681SAndroid Build Coastguard Worker       // Flush the operand list to suppress SCEVExpander hoisting address modes.
4508*9880d681SAndroid Build Coastguard Worker       // Unless the addressing mode will not be folded.
4509*9880d681SAndroid Build Coastguard Worker       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
4510*9880d681SAndroid Build Coastguard Worker           isAMCompletelyFolded(TTI, LU, F)) {
4511*9880d681SAndroid Build Coastguard Worker         Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
4512*9880d681SAndroid Build Coastguard Worker         Ops.clear();
4513*9880d681SAndroid Build Coastguard Worker         Ops.push_back(SE.getUnknown(FullV));
4514*9880d681SAndroid Build Coastguard Worker       }
4515*9880d681SAndroid Build Coastguard Worker       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
4516*9880d681SAndroid Build Coastguard Worker       if (F.Scale != 1)
4517*9880d681SAndroid Build Coastguard Worker         ScaledS =
4518*9880d681SAndroid Build Coastguard Worker             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
4519*9880d681SAndroid Build Coastguard Worker       Ops.push_back(ScaledS);
4520*9880d681SAndroid Build Coastguard Worker     }
4521*9880d681SAndroid Build Coastguard Worker   }
4522*9880d681SAndroid Build Coastguard Worker 
4523*9880d681SAndroid Build Coastguard Worker   // Expand the GV portion.
4524*9880d681SAndroid Build Coastguard Worker   if (F.BaseGV) {
4525*9880d681SAndroid Build Coastguard Worker     // Flush the operand list to suppress SCEVExpander hoisting.
4526*9880d681SAndroid Build Coastguard Worker     if (!Ops.empty()) {
4527*9880d681SAndroid Build Coastguard Worker       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
4528*9880d681SAndroid Build Coastguard Worker       Ops.clear();
4529*9880d681SAndroid Build Coastguard Worker       Ops.push_back(SE.getUnknown(FullV));
4530*9880d681SAndroid Build Coastguard Worker     }
4531*9880d681SAndroid Build Coastguard Worker     Ops.push_back(SE.getUnknown(F.BaseGV));
4532*9880d681SAndroid Build Coastguard Worker   }
4533*9880d681SAndroid Build Coastguard Worker 
4534*9880d681SAndroid Build Coastguard Worker   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
4535*9880d681SAndroid Build Coastguard Worker   // unfolded offsets. LSR assumes they both live next to their uses.
4536*9880d681SAndroid Build Coastguard Worker   if (!Ops.empty()) {
4537*9880d681SAndroid Build Coastguard Worker     Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
4538*9880d681SAndroid Build Coastguard Worker     Ops.clear();
4539*9880d681SAndroid Build Coastguard Worker     Ops.push_back(SE.getUnknown(FullV));
4540*9880d681SAndroid Build Coastguard Worker   }
4541*9880d681SAndroid Build Coastguard Worker 
4542*9880d681SAndroid Build Coastguard Worker   // Expand the immediate portion.
4543*9880d681SAndroid Build Coastguard Worker   int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
4544*9880d681SAndroid Build Coastguard Worker   if (Offset != 0) {
4545*9880d681SAndroid Build Coastguard Worker     if (LU.Kind == LSRUse::ICmpZero) {
4546*9880d681SAndroid Build Coastguard Worker       // The other interesting way of "folding" with an ICmpZero is to use a
4547*9880d681SAndroid Build Coastguard Worker       // negated immediate.
4548*9880d681SAndroid Build Coastguard Worker       if (!ICmpScaledV)
4549*9880d681SAndroid Build Coastguard Worker         ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
4550*9880d681SAndroid Build Coastguard Worker       else {
4551*9880d681SAndroid Build Coastguard Worker         Ops.push_back(SE.getUnknown(ICmpScaledV));
4552*9880d681SAndroid Build Coastguard Worker         ICmpScaledV = ConstantInt::get(IntTy, Offset);
4553*9880d681SAndroid Build Coastguard Worker       }
4554*9880d681SAndroid Build Coastguard Worker     } else {
4555*9880d681SAndroid Build Coastguard Worker       // Just add the immediate values. These again are expected to be matched
4556*9880d681SAndroid Build Coastguard Worker       // as part of the address.
4557*9880d681SAndroid Build Coastguard Worker       Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
4558*9880d681SAndroid Build Coastguard Worker     }
4559*9880d681SAndroid Build Coastguard Worker   }
4560*9880d681SAndroid Build Coastguard Worker 
4561*9880d681SAndroid Build Coastguard Worker   // Expand the unfolded offset portion.
4562*9880d681SAndroid Build Coastguard Worker   int64_t UnfoldedOffset = F.UnfoldedOffset;
4563*9880d681SAndroid Build Coastguard Worker   if (UnfoldedOffset != 0) {
4564*9880d681SAndroid Build Coastguard Worker     // Just add the immediate values.
4565*9880d681SAndroid Build Coastguard Worker     Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
4566*9880d681SAndroid Build Coastguard Worker                                                        UnfoldedOffset)));
4567*9880d681SAndroid Build Coastguard Worker   }
4568*9880d681SAndroid Build Coastguard Worker 
4569*9880d681SAndroid Build Coastguard Worker   // Emit instructions summing all the operands.
4570*9880d681SAndroid Build Coastguard Worker   const SCEV *FullS = Ops.empty() ?
4571*9880d681SAndroid Build Coastguard Worker                       SE.getConstant(IntTy, 0) :
4572*9880d681SAndroid Build Coastguard Worker                       SE.getAddExpr(Ops);
4573*9880d681SAndroid Build Coastguard Worker   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
4574*9880d681SAndroid Build Coastguard Worker 
4575*9880d681SAndroid Build Coastguard Worker   // We're done expanding now, so reset the rewriter.
4576*9880d681SAndroid Build Coastguard Worker   Rewriter.clearPostInc();
4577*9880d681SAndroid Build Coastguard Worker 
4578*9880d681SAndroid Build Coastguard Worker   // An ICmpZero Formula represents an ICmp which we're handling as a
4579*9880d681SAndroid Build Coastguard Worker   // comparison against zero. Now that we've expanded an expression for that
4580*9880d681SAndroid Build Coastguard Worker   // form, update the ICmp's other operand.
4581*9880d681SAndroid Build Coastguard Worker   if (LU.Kind == LSRUse::ICmpZero) {
4582*9880d681SAndroid Build Coastguard Worker     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
4583*9880d681SAndroid Build Coastguard Worker     DeadInsts.emplace_back(CI->getOperand(1));
4584*9880d681SAndroid Build Coastguard Worker     assert(!F.BaseGV && "ICmp does not support folding a global value and "
4585*9880d681SAndroid Build Coastguard Worker                            "a scale at the same time!");
4586*9880d681SAndroid Build Coastguard Worker     if (F.Scale == -1) {
4587*9880d681SAndroid Build Coastguard Worker       if (ICmpScaledV->getType() != OpTy) {
4588*9880d681SAndroid Build Coastguard Worker         Instruction *Cast =
4589*9880d681SAndroid Build Coastguard Worker           CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
4590*9880d681SAndroid Build Coastguard Worker                                                    OpTy, false),
4591*9880d681SAndroid Build Coastguard Worker                            ICmpScaledV, OpTy, "tmp", CI);
4592*9880d681SAndroid Build Coastguard Worker         ICmpScaledV = Cast;
4593*9880d681SAndroid Build Coastguard Worker       }
4594*9880d681SAndroid Build Coastguard Worker       CI->setOperand(1, ICmpScaledV);
4595*9880d681SAndroid Build Coastguard Worker     } else {
4596*9880d681SAndroid Build Coastguard Worker       // A scale of 1 means that the scale has been expanded as part of the
4597*9880d681SAndroid Build Coastguard Worker       // base regs.
4598*9880d681SAndroid Build Coastguard Worker       assert((F.Scale == 0 || F.Scale == 1) &&
4599*9880d681SAndroid Build Coastguard Worker              "ICmp does not support folding a global value and "
4600*9880d681SAndroid Build Coastguard Worker              "a scale at the same time!");
4601*9880d681SAndroid Build Coastguard Worker       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
4602*9880d681SAndroid Build Coastguard Worker                                            -(uint64_t)Offset);
4603*9880d681SAndroid Build Coastguard Worker       if (C->getType() != OpTy)
4604*9880d681SAndroid Build Coastguard Worker         C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
4605*9880d681SAndroid Build Coastguard Worker                                                           OpTy, false),
4606*9880d681SAndroid Build Coastguard Worker                                   C, OpTy);
4607*9880d681SAndroid Build Coastguard Worker 
4608*9880d681SAndroid Build Coastguard Worker       CI->setOperand(1, C);
4609*9880d681SAndroid Build Coastguard Worker     }
4610*9880d681SAndroid Build Coastguard Worker   }
4611*9880d681SAndroid Build Coastguard Worker 
4612*9880d681SAndroid Build Coastguard Worker   return FullV;
4613*9880d681SAndroid Build Coastguard Worker }
4614*9880d681SAndroid Build Coastguard Worker 
4615*9880d681SAndroid Build Coastguard Worker /// Helper for Rewrite. PHI nodes are special because the use of their operands
4616*9880d681SAndroid Build Coastguard Worker /// effectively happens in their predecessor blocks, so the expression may need
4617*9880d681SAndroid Build Coastguard Worker /// to be expanded in multiple places.
RewriteForPHI(PHINode * PN,const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakVH> & DeadInsts) const4618*9880d681SAndroid Build Coastguard Worker void LSRInstance::RewriteForPHI(PHINode *PN,
4619*9880d681SAndroid Build Coastguard Worker                                 const LSRFixup &LF,
4620*9880d681SAndroid Build Coastguard Worker                                 const Formula &F,
4621*9880d681SAndroid Build Coastguard Worker                                 SCEVExpander &Rewriter,
4622*9880d681SAndroid Build Coastguard Worker                                 SmallVectorImpl<WeakVH> &DeadInsts) const {
4623*9880d681SAndroid Build Coastguard Worker   DenseMap<BasicBlock *, Value *> Inserted;
4624*9880d681SAndroid Build Coastguard Worker   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
4625*9880d681SAndroid Build Coastguard Worker     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
4626*9880d681SAndroid Build Coastguard Worker       BasicBlock *BB = PN->getIncomingBlock(i);
4627*9880d681SAndroid Build Coastguard Worker 
4628*9880d681SAndroid Build Coastguard Worker       // If this is a critical edge, split the edge so that we do not insert
4629*9880d681SAndroid Build Coastguard Worker       // the code on all predecessor/successor paths.  We do this unless this
4630*9880d681SAndroid Build Coastguard Worker       // is the canonical backedge for this loop, which complicates post-inc
4631*9880d681SAndroid Build Coastguard Worker       // users.
4632*9880d681SAndroid Build Coastguard Worker       if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
4633*9880d681SAndroid Build Coastguard Worker           !isa<IndirectBrInst>(BB->getTerminator())) {
4634*9880d681SAndroid Build Coastguard Worker         BasicBlock *Parent = PN->getParent();
4635*9880d681SAndroid Build Coastguard Worker         Loop *PNLoop = LI.getLoopFor(Parent);
4636*9880d681SAndroid Build Coastguard Worker         if (!PNLoop || Parent != PNLoop->getHeader()) {
4637*9880d681SAndroid Build Coastguard Worker           // Split the critical edge.
4638*9880d681SAndroid Build Coastguard Worker           BasicBlock *NewBB = nullptr;
4639*9880d681SAndroid Build Coastguard Worker           if (!Parent->isLandingPad()) {
4640*9880d681SAndroid Build Coastguard Worker             NewBB = SplitCriticalEdge(BB, Parent,
4641*9880d681SAndroid Build Coastguard Worker                                       CriticalEdgeSplittingOptions(&DT, &LI)
4642*9880d681SAndroid Build Coastguard Worker                                           .setMergeIdenticalEdges()
4643*9880d681SAndroid Build Coastguard Worker                                           .setDontDeleteUselessPHIs());
4644*9880d681SAndroid Build Coastguard Worker           } else {
4645*9880d681SAndroid Build Coastguard Worker             SmallVector<BasicBlock*, 2> NewBBs;
4646*9880d681SAndroid Build Coastguard Worker             SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
4647*9880d681SAndroid Build Coastguard Worker             NewBB = NewBBs[0];
4648*9880d681SAndroid Build Coastguard Worker           }
4649*9880d681SAndroid Build Coastguard Worker           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
4650*9880d681SAndroid Build Coastguard Worker           // phi predecessors are identical. The simple thing to do is skip
4651*9880d681SAndroid Build Coastguard Worker           // splitting in this case rather than complicate the API.
4652*9880d681SAndroid Build Coastguard Worker           if (NewBB) {
4653*9880d681SAndroid Build Coastguard Worker             // If PN is outside of the loop and BB is in the loop, we want to
4654*9880d681SAndroid Build Coastguard Worker             // move the block to be immediately before the PHI block, not
4655*9880d681SAndroid Build Coastguard Worker             // immediately after BB.
4656*9880d681SAndroid Build Coastguard Worker             if (L->contains(BB) && !L->contains(PN))
4657*9880d681SAndroid Build Coastguard Worker               NewBB->moveBefore(PN->getParent());
4658*9880d681SAndroid Build Coastguard Worker 
4659*9880d681SAndroid Build Coastguard Worker             // Splitting the edge can reduce the number of PHI entries we have.
4660*9880d681SAndroid Build Coastguard Worker             e = PN->getNumIncomingValues();
4661*9880d681SAndroid Build Coastguard Worker             BB = NewBB;
4662*9880d681SAndroid Build Coastguard Worker             i = PN->getBasicBlockIndex(BB);
4663*9880d681SAndroid Build Coastguard Worker           }
4664*9880d681SAndroid Build Coastguard Worker         }
4665*9880d681SAndroid Build Coastguard Worker       }
4666*9880d681SAndroid Build Coastguard Worker 
4667*9880d681SAndroid Build Coastguard Worker       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
4668*9880d681SAndroid Build Coastguard Worker         Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
4669*9880d681SAndroid Build Coastguard Worker       if (!Pair.second)
4670*9880d681SAndroid Build Coastguard Worker         PN->setIncomingValue(i, Pair.first->second);
4671*9880d681SAndroid Build Coastguard Worker       else {
4672*9880d681SAndroid Build Coastguard Worker         Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
4673*9880d681SAndroid Build Coastguard Worker                               Rewriter, DeadInsts);
4674*9880d681SAndroid Build Coastguard Worker 
4675*9880d681SAndroid Build Coastguard Worker         // If this is reuse-by-noop-cast, insert the noop cast.
4676*9880d681SAndroid Build Coastguard Worker         Type *OpTy = LF.OperandValToReplace->getType();
4677*9880d681SAndroid Build Coastguard Worker         if (FullV->getType() != OpTy)
4678*9880d681SAndroid Build Coastguard Worker           FullV =
4679*9880d681SAndroid Build Coastguard Worker             CastInst::Create(CastInst::getCastOpcode(FullV, false,
4680*9880d681SAndroid Build Coastguard Worker                                                      OpTy, false),
4681*9880d681SAndroid Build Coastguard Worker                              FullV, LF.OperandValToReplace->getType(),
4682*9880d681SAndroid Build Coastguard Worker                              "tmp", BB->getTerminator());
4683*9880d681SAndroid Build Coastguard Worker 
4684*9880d681SAndroid Build Coastguard Worker         PN->setIncomingValue(i, FullV);
4685*9880d681SAndroid Build Coastguard Worker         Pair.first->second = FullV;
4686*9880d681SAndroid Build Coastguard Worker       }
4687*9880d681SAndroid Build Coastguard Worker     }
4688*9880d681SAndroid Build Coastguard Worker }
4689*9880d681SAndroid Build Coastguard Worker 
4690*9880d681SAndroid Build Coastguard Worker /// Emit instructions for the leading candidate expression for this LSRUse (this
4691*9880d681SAndroid Build Coastguard Worker /// is called "expanding"), and update the UserInst to reference the newly
4692*9880d681SAndroid Build Coastguard Worker /// expanded value.
Rewrite(const LSRFixup & LF,const Formula & F,SCEVExpander & Rewriter,SmallVectorImpl<WeakVH> & DeadInsts) const4693*9880d681SAndroid Build Coastguard Worker void LSRInstance::Rewrite(const LSRFixup &LF,
4694*9880d681SAndroid Build Coastguard Worker                           const Formula &F,
4695*9880d681SAndroid Build Coastguard Worker                           SCEVExpander &Rewriter,
4696*9880d681SAndroid Build Coastguard Worker                           SmallVectorImpl<WeakVH> &DeadInsts) const {
4697*9880d681SAndroid Build Coastguard Worker   // First, find an insertion point that dominates UserInst. For PHI nodes,
4698*9880d681SAndroid Build Coastguard Worker   // find the nearest block which dominates all the relevant uses.
4699*9880d681SAndroid Build Coastguard Worker   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
4700*9880d681SAndroid Build Coastguard Worker     RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
4701*9880d681SAndroid Build Coastguard Worker   } else {
4702*9880d681SAndroid Build Coastguard Worker     Value *FullV =
4703*9880d681SAndroid Build Coastguard Worker         Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
4704*9880d681SAndroid Build Coastguard Worker 
4705*9880d681SAndroid Build Coastguard Worker     // If this is reuse-by-noop-cast, insert the noop cast.
4706*9880d681SAndroid Build Coastguard Worker     Type *OpTy = LF.OperandValToReplace->getType();
4707*9880d681SAndroid Build Coastguard Worker     if (FullV->getType() != OpTy) {
4708*9880d681SAndroid Build Coastguard Worker       Instruction *Cast =
4709*9880d681SAndroid Build Coastguard Worker         CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
4710*9880d681SAndroid Build Coastguard Worker                          FullV, OpTy, "tmp", LF.UserInst);
4711*9880d681SAndroid Build Coastguard Worker       FullV = Cast;
4712*9880d681SAndroid Build Coastguard Worker     }
4713*9880d681SAndroid Build Coastguard Worker 
4714*9880d681SAndroid Build Coastguard Worker     // Update the user. ICmpZero is handled specially here (for now) because
4715*9880d681SAndroid Build Coastguard Worker     // Expand may have updated one of the operands of the icmp already, and
4716*9880d681SAndroid Build Coastguard Worker     // its new value may happen to be equal to LF.OperandValToReplace, in
4717*9880d681SAndroid Build Coastguard Worker     // which case doing replaceUsesOfWith leads to replacing both operands
4718*9880d681SAndroid Build Coastguard Worker     // with the same value. TODO: Reorganize this.
4719*9880d681SAndroid Build Coastguard Worker     if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
4720*9880d681SAndroid Build Coastguard Worker       LF.UserInst->setOperand(0, FullV);
4721*9880d681SAndroid Build Coastguard Worker     else
4722*9880d681SAndroid Build Coastguard Worker       LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
4723*9880d681SAndroid Build Coastguard Worker   }
4724*9880d681SAndroid Build Coastguard Worker 
4725*9880d681SAndroid Build Coastguard Worker   DeadInsts.emplace_back(LF.OperandValToReplace);
4726*9880d681SAndroid Build Coastguard Worker }
4727*9880d681SAndroid Build Coastguard Worker 
4728*9880d681SAndroid Build Coastguard Worker /// Rewrite all the fixup locations with new values, following the chosen
4729*9880d681SAndroid Build Coastguard Worker /// solution.
ImplementSolution(const SmallVectorImpl<const Formula * > & Solution)4730*9880d681SAndroid Build Coastguard Worker void LSRInstance::ImplementSolution(
4731*9880d681SAndroid Build Coastguard Worker     const SmallVectorImpl<const Formula *> &Solution) {
4732*9880d681SAndroid Build Coastguard Worker   // Keep track of instructions we may have made dead, so that
4733*9880d681SAndroid Build Coastguard Worker   // we can remove them after we are done working.
4734*9880d681SAndroid Build Coastguard Worker   SmallVector<WeakVH, 16> DeadInsts;
4735*9880d681SAndroid Build Coastguard Worker 
4736*9880d681SAndroid Build Coastguard Worker   SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
4737*9880d681SAndroid Build Coastguard Worker                         "lsr");
4738*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
4739*9880d681SAndroid Build Coastguard Worker   Rewriter.setDebugType(DEBUG_TYPE);
4740*9880d681SAndroid Build Coastguard Worker #endif
4741*9880d681SAndroid Build Coastguard Worker   Rewriter.disableCanonicalMode();
4742*9880d681SAndroid Build Coastguard Worker   Rewriter.enableLSRMode();
4743*9880d681SAndroid Build Coastguard Worker   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
4744*9880d681SAndroid Build Coastguard Worker 
4745*9880d681SAndroid Build Coastguard Worker   // Mark phi nodes that terminate chains so the expander tries to reuse them.
4746*9880d681SAndroid Build Coastguard Worker   for (const IVChain &Chain : IVChainVec) {
4747*9880d681SAndroid Build Coastguard Worker     if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
4748*9880d681SAndroid Build Coastguard Worker       Rewriter.setChainedPhi(PN);
4749*9880d681SAndroid Build Coastguard Worker   }
4750*9880d681SAndroid Build Coastguard Worker 
4751*9880d681SAndroid Build Coastguard Worker   // Expand the new value definitions and update the users.
4752*9880d681SAndroid Build Coastguard Worker   for (const LSRFixup &Fixup : Fixups) {
4753*9880d681SAndroid Build Coastguard Worker     Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
4754*9880d681SAndroid Build Coastguard Worker 
4755*9880d681SAndroid Build Coastguard Worker     Changed = true;
4756*9880d681SAndroid Build Coastguard Worker   }
4757*9880d681SAndroid Build Coastguard Worker 
4758*9880d681SAndroid Build Coastguard Worker   for (const IVChain &Chain : IVChainVec) {
4759*9880d681SAndroid Build Coastguard Worker     GenerateIVChain(Chain, Rewriter, DeadInsts);
4760*9880d681SAndroid Build Coastguard Worker     Changed = true;
4761*9880d681SAndroid Build Coastguard Worker   }
4762*9880d681SAndroid Build Coastguard Worker   // Clean up after ourselves. This must be done before deleting any
4763*9880d681SAndroid Build Coastguard Worker   // instructions.
4764*9880d681SAndroid Build Coastguard Worker   Rewriter.clear();
4765*9880d681SAndroid Build Coastguard Worker 
4766*9880d681SAndroid Build Coastguard Worker   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
4767*9880d681SAndroid Build Coastguard Worker }
4768*9880d681SAndroid Build Coastguard Worker 
LSRInstance(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI)4769*9880d681SAndroid Build Coastguard Worker LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
4770*9880d681SAndroid Build Coastguard Worker                          DominatorTree &DT, LoopInfo &LI,
4771*9880d681SAndroid Build Coastguard Worker                          const TargetTransformInfo &TTI)
4772*9880d681SAndroid Build Coastguard Worker     : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
4773*9880d681SAndroid Build Coastguard Worker       IVIncInsertPos(nullptr) {
4774*9880d681SAndroid Build Coastguard Worker   // If LoopSimplify form is not available, stay out of trouble.
4775*9880d681SAndroid Build Coastguard Worker   if (!L->isLoopSimplifyForm())
4776*9880d681SAndroid Build Coastguard Worker     return;
4777*9880d681SAndroid Build Coastguard Worker 
4778*9880d681SAndroid Build Coastguard Worker   // If there's no interesting work to be done, bail early.
4779*9880d681SAndroid Build Coastguard Worker   if (IU.empty()) return;
4780*9880d681SAndroid Build Coastguard Worker 
4781*9880d681SAndroid Build Coastguard Worker   // If there's too much analysis to be done, bail early. We won't be able to
4782*9880d681SAndroid Build Coastguard Worker   // model the problem anyway.
4783*9880d681SAndroid Build Coastguard Worker   unsigned NumUsers = 0;
4784*9880d681SAndroid Build Coastguard Worker   for (const IVStrideUse &U : IU) {
4785*9880d681SAndroid Build Coastguard Worker     if (++NumUsers > MaxIVUsers) {
4786*9880d681SAndroid Build Coastguard Worker       (void)U;
4787*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
4788*9880d681SAndroid Build Coastguard Worker       return;
4789*9880d681SAndroid Build Coastguard Worker     }
4790*9880d681SAndroid Build Coastguard Worker     // Bail out if we have a PHI on an EHPad that gets a value from a
4791*9880d681SAndroid Build Coastguard Worker     // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is
4792*9880d681SAndroid Build Coastguard Worker     // no good place to stick any instructions.
4793*9880d681SAndroid Build Coastguard Worker     if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
4794*9880d681SAndroid Build Coastguard Worker        auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
4795*9880d681SAndroid Build Coastguard Worker        if (isa<FuncletPadInst>(FirstNonPHI) ||
4796*9880d681SAndroid Build Coastguard Worker            isa<CatchSwitchInst>(FirstNonPHI))
4797*9880d681SAndroid Build Coastguard Worker          for (BasicBlock *PredBB : PN->blocks())
4798*9880d681SAndroid Build Coastguard Worker            if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
4799*9880d681SAndroid Build Coastguard Worker              return;
4800*9880d681SAndroid Build Coastguard Worker     }
4801*9880d681SAndroid Build Coastguard Worker   }
4802*9880d681SAndroid Build Coastguard Worker 
4803*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
4804*9880d681SAndroid Build Coastguard Worker   // All dominating loops must have preheaders, or SCEVExpander may not be able
4805*9880d681SAndroid Build Coastguard Worker   // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
4806*9880d681SAndroid Build Coastguard Worker   //
4807*9880d681SAndroid Build Coastguard Worker   // IVUsers analysis should only create users that are dominated by simple loop
4808*9880d681SAndroid Build Coastguard Worker   // headers. Since this loop should dominate all of its users, its user list
4809*9880d681SAndroid Build Coastguard Worker   // should be empty if this loop itself is not within a simple loop nest.
4810*9880d681SAndroid Build Coastguard Worker   for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
4811*9880d681SAndroid Build Coastguard Worker        Rung; Rung = Rung->getIDom()) {
4812*9880d681SAndroid Build Coastguard Worker     BasicBlock *BB = Rung->getBlock();
4813*9880d681SAndroid Build Coastguard Worker     const Loop *DomLoop = LI.getLoopFor(BB);
4814*9880d681SAndroid Build Coastguard Worker     if (DomLoop && DomLoop->getHeader() == BB) {
4815*9880d681SAndroid Build Coastguard Worker       assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
4816*9880d681SAndroid Build Coastguard Worker     }
4817*9880d681SAndroid Build Coastguard Worker   }
4818*9880d681SAndroid Build Coastguard Worker #endif // DEBUG
4819*9880d681SAndroid Build Coastguard Worker 
4820*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "\nLSR on loop ";
4821*9880d681SAndroid Build Coastguard Worker         L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
4822*9880d681SAndroid Build Coastguard Worker         dbgs() << ":\n");
4823*9880d681SAndroid Build Coastguard Worker 
4824*9880d681SAndroid Build Coastguard Worker   // First, perform some low-level loop optimizations.
4825*9880d681SAndroid Build Coastguard Worker   OptimizeShadowIV();
4826*9880d681SAndroid Build Coastguard Worker   OptimizeLoopTermCond();
4827*9880d681SAndroid Build Coastguard Worker 
4828*9880d681SAndroid Build Coastguard Worker   // If loop preparation eliminates all interesting IV users, bail.
4829*9880d681SAndroid Build Coastguard Worker   if (IU.empty()) return;
4830*9880d681SAndroid Build Coastguard Worker 
4831*9880d681SAndroid Build Coastguard Worker   // Skip nested loops until we can model them better with formulae.
4832*9880d681SAndroid Build Coastguard Worker   if (!L->empty()) {
4833*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
4834*9880d681SAndroid Build Coastguard Worker     return;
4835*9880d681SAndroid Build Coastguard Worker   }
4836*9880d681SAndroid Build Coastguard Worker 
4837*9880d681SAndroid Build Coastguard Worker   // Start collecting data and preparing for the solver.
4838*9880d681SAndroid Build Coastguard Worker   CollectChains();
4839*9880d681SAndroid Build Coastguard Worker   CollectInterestingTypesAndFactors();
4840*9880d681SAndroid Build Coastguard Worker   CollectFixupsAndInitialFormulae();
4841*9880d681SAndroid Build Coastguard Worker   CollectLoopInvariantFixupsAndFormulae();
4842*9880d681SAndroid Build Coastguard Worker 
4843*9880d681SAndroid Build Coastguard Worker   assert(!Uses.empty() && "IVUsers reported at least one use");
4844*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
4845*9880d681SAndroid Build Coastguard Worker         print_uses(dbgs()));
4846*9880d681SAndroid Build Coastguard Worker 
4847*9880d681SAndroid Build Coastguard Worker   // Now use the reuse data to generate a bunch of interesting ways
4848*9880d681SAndroid Build Coastguard Worker   // to formulate the values needed for the uses.
4849*9880d681SAndroid Build Coastguard Worker   GenerateAllReuseFormulae();
4850*9880d681SAndroid Build Coastguard Worker 
4851*9880d681SAndroid Build Coastguard Worker   FilterOutUndesirableDedicatedRegisters();
4852*9880d681SAndroid Build Coastguard Worker   NarrowSearchSpaceUsingHeuristics();
4853*9880d681SAndroid Build Coastguard Worker 
4854*9880d681SAndroid Build Coastguard Worker   SmallVector<const Formula *, 8> Solution;
4855*9880d681SAndroid Build Coastguard Worker   Solve(Solution);
4856*9880d681SAndroid Build Coastguard Worker 
4857*9880d681SAndroid Build Coastguard Worker   // Release memory that is no longer needed.
4858*9880d681SAndroid Build Coastguard Worker   Factors.clear();
4859*9880d681SAndroid Build Coastguard Worker   Types.clear();
4860*9880d681SAndroid Build Coastguard Worker   RegUses.clear();
4861*9880d681SAndroid Build Coastguard Worker 
4862*9880d681SAndroid Build Coastguard Worker   if (Solution.empty())
4863*9880d681SAndroid Build Coastguard Worker     return;
4864*9880d681SAndroid Build Coastguard Worker 
4865*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
4866*9880d681SAndroid Build Coastguard Worker   // Formulae should be legal.
4867*9880d681SAndroid Build Coastguard Worker   for (const LSRUse &LU : Uses) {
4868*9880d681SAndroid Build Coastguard Worker     for (const Formula &F : LU.Formulae)
4869*9880d681SAndroid Build Coastguard Worker       assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4870*9880d681SAndroid Build Coastguard Worker                         F) && "Illegal formula generated!");
4871*9880d681SAndroid Build Coastguard Worker   };
4872*9880d681SAndroid Build Coastguard Worker #endif
4873*9880d681SAndroid Build Coastguard Worker 
4874*9880d681SAndroid Build Coastguard Worker   // Now that we've decided what we want, make it so.
4875*9880d681SAndroid Build Coastguard Worker   ImplementSolution(Solution);
4876*9880d681SAndroid Build Coastguard Worker }
4877*9880d681SAndroid Build Coastguard Worker 
print_factors_and_types(raw_ostream & OS) const4878*9880d681SAndroid Build Coastguard Worker void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
4879*9880d681SAndroid Build Coastguard Worker   if (Factors.empty() && Types.empty()) return;
4880*9880d681SAndroid Build Coastguard Worker 
4881*9880d681SAndroid Build Coastguard Worker   OS << "LSR has identified the following interesting factors and types: ";
4882*9880d681SAndroid Build Coastguard Worker   bool First = true;
4883*9880d681SAndroid Build Coastguard Worker 
4884*9880d681SAndroid Build Coastguard Worker   for (int64_t Factor : Factors) {
4885*9880d681SAndroid Build Coastguard Worker     if (!First) OS << ", ";
4886*9880d681SAndroid Build Coastguard Worker     First = false;
4887*9880d681SAndroid Build Coastguard Worker     OS << '*' << Factor;
4888*9880d681SAndroid Build Coastguard Worker   }
4889*9880d681SAndroid Build Coastguard Worker 
4890*9880d681SAndroid Build Coastguard Worker   for (Type *Ty : Types) {
4891*9880d681SAndroid Build Coastguard Worker     if (!First) OS << ", ";
4892*9880d681SAndroid Build Coastguard Worker     First = false;
4893*9880d681SAndroid Build Coastguard Worker     OS << '(' << *Ty << ')';
4894*9880d681SAndroid Build Coastguard Worker   }
4895*9880d681SAndroid Build Coastguard Worker   OS << '\n';
4896*9880d681SAndroid Build Coastguard Worker }
4897*9880d681SAndroid Build Coastguard Worker 
print_fixups(raw_ostream & OS) const4898*9880d681SAndroid Build Coastguard Worker void LSRInstance::print_fixups(raw_ostream &OS) const {
4899*9880d681SAndroid Build Coastguard Worker   OS << "LSR is examining the following fixup sites:\n";
4900*9880d681SAndroid Build Coastguard Worker   for (const LSRFixup &LF : Fixups) {
4901*9880d681SAndroid Build Coastguard Worker     dbgs() << "  ";
4902*9880d681SAndroid Build Coastguard Worker     LF.print(OS);
4903*9880d681SAndroid Build Coastguard Worker     OS << '\n';
4904*9880d681SAndroid Build Coastguard Worker   }
4905*9880d681SAndroid Build Coastguard Worker }
4906*9880d681SAndroid Build Coastguard Worker 
print_uses(raw_ostream & OS) const4907*9880d681SAndroid Build Coastguard Worker void LSRInstance::print_uses(raw_ostream &OS) const {
4908*9880d681SAndroid Build Coastguard Worker   OS << "LSR is examining the following uses:\n";
4909*9880d681SAndroid Build Coastguard Worker   for (const LSRUse &LU : Uses) {
4910*9880d681SAndroid Build Coastguard Worker     dbgs() << "  ";
4911*9880d681SAndroid Build Coastguard Worker     LU.print(OS);
4912*9880d681SAndroid Build Coastguard Worker     OS << '\n';
4913*9880d681SAndroid Build Coastguard Worker     for (const Formula &F : LU.Formulae) {
4914*9880d681SAndroid Build Coastguard Worker       OS << "    ";
4915*9880d681SAndroid Build Coastguard Worker       F.print(OS);
4916*9880d681SAndroid Build Coastguard Worker       OS << '\n';
4917*9880d681SAndroid Build Coastguard Worker     }
4918*9880d681SAndroid Build Coastguard Worker   }
4919*9880d681SAndroid Build Coastguard Worker }
4920*9880d681SAndroid Build Coastguard Worker 
print(raw_ostream & OS) const4921*9880d681SAndroid Build Coastguard Worker void LSRInstance::print(raw_ostream &OS) const {
4922*9880d681SAndroid Build Coastguard Worker   print_factors_and_types(OS);
4923*9880d681SAndroid Build Coastguard Worker   print_fixups(OS);
4924*9880d681SAndroid Build Coastguard Worker   print_uses(OS);
4925*9880d681SAndroid Build Coastguard Worker }
4926*9880d681SAndroid Build Coastguard Worker 
4927*9880d681SAndroid Build Coastguard Worker LLVM_DUMP_METHOD
dump() const4928*9880d681SAndroid Build Coastguard Worker void LSRInstance::dump() const {
4929*9880d681SAndroid Build Coastguard Worker   print(errs()); errs() << '\n';
4930*9880d681SAndroid Build Coastguard Worker }
4931*9880d681SAndroid Build Coastguard Worker 
4932*9880d681SAndroid Build Coastguard Worker namespace {
4933*9880d681SAndroid Build Coastguard Worker 
4934*9880d681SAndroid Build Coastguard Worker class LoopStrengthReduce : public LoopPass {
4935*9880d681SAndroid Build Coastguard Worker public:
4936*9880d681SAndroid Build Coastguard Worker   static char ID; // Pass ID, replacement for typeid
4937*9880d681SAndroid Build Coastguard Worker   LoopStrengthReduce();
4938*9880d681SAndroid Build Coastguard Worker 
4939*9880d681SAndroid Build Coastguard Worker private:
4940*9880d681SAndroid Build Coastguard Worker   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
4941*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override;
4942*9880d681SAndroid Build Coastguard Worker };
4943*9880d681SAndroid Build Coastguard Worker 
4944*9880d681SAndroid Build Coastguard Worker }
4945*9880d681SAndroid Build Coastguard Worker 
4946*9880d681SAndroid Build Coastguard Worker char LoopStrengthReduce::ID = 0;
4947*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
4948*9880d681SAndroid Build Coastguard Worker                 "Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)4949*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
4950*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
4951*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
4952*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(IVUsers)
4953*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
4954*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
4955*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
4956*9880d681SAndroid Build Coastguard Worker                 "Loop Strength Reduction", false, false)
4957*9880d681SAndroid Build Coastguard Worker 
4958*9880d681SAndroid Build Coastguard Worker 
4959*9880d681SAndroid Build Coastguard Worker Pass *llvm::createLoopStrengthReducePass() {
4960*9880d681SAndroid Build Coastguard Worker   return new LoopStrengthReduce();
4961*9880d681SAndroid Build Coastguard Worker }
4962*9880d681SAndroid Build Coastguard Worker 
LoopStrengthReduce()4963*9880d681SAndroid Build Coastguard Worker LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
4964*9880d681SAndroid Build Coastguard Worker   initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
4965*9880d681SAndroid Build Coastguard Worker }
4966*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const4967*9880d681SAndroid Build Coastguard Worker void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
4968*9880d681SAndroid Build Coastguard Worker   // We split critical edges, so we change the CFG.  However, we do update
4969*9880d681SAndroid Build Coastguard Worker   // many analyses if they are around.
4970*9880d681SAndroid Build Coastguard Worker   AU.addPreservedID(LoopSimplifyID);
4971*9880d681SAndroid Build Coastguard Worker 
4972*9880d681SAndroid Build Coastguard Worker   AU.addRequired<LoopInfoWrapperPass>();
4973*9880d681SAndroid Build Coastguard Worker   AU.addPreserved<LoopInfoWrapperPass>();
4974*9880d681SAndroid Build Coastguard Worker   AU.addRequiredID(LoopSimplifyID);
4975*9880d681SAndroid Build Coastguard Worker   AU.addRequired<DominatorTreeWrapperPass>();
4976*9880d681SAndroid Build Coastguard Worker   AU.addPreserved<DominatorTreeWrapperPass>();
4977*9880d681SAndroid Build Coastguard Worker   AU.addRequired<ScalarEvolutionWrapperPass>();
4978*9880d681SAndroid Build Coastguard Worker   AU.addPreserved<ScalarEvolutionWrapperPass>();
4979*9880d681SAndroid Build Coastguard Worker   // Requiring LoopSimplify a second time here prevents IVUsers from running
4980*9880d681SAndroid Build Coastguard Worker   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
4981*9880d681SAndroid Build Coastguard Worker   AU.addRequiredID(LoopSimplifyID);
4982*9880d681SAndroid Build Coastguard Worker   AU.addRequired<IVUsers>();
4983*9880d681SAndroid Build Coastguard Worker   AU.addPreserved<IVUsers>();
4984*9880d681SAndroid Build Coastguard Worker   AU.addRequired<TargetTransformInfoWrapperPass>();
4985*9880d681SAndroid Build Coastguard Worker }
4986*9880d681SAndroid Build Coastguard Worker 
runOnLoop(Loop * L,LPPassManager &)4987*9880d681SAndroid Build Coastguard Worker bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
4988*9880d681SAndroid Build Coastguard Worker   if (skipLoop(L))
4989*9880d681SAndroid Build Coastguard Worker     return false;
4990*9880d681SAndroid Build Coastguard Worker 
4991*9880d681SAndroid Build Coastguard Worker   auto &IU = getAnalysis<IVUsers>();
4992*9880d681SAndroid Build Coastguard Worker   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
4993*9880d681SAndroid Build Coastguard Worker   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4994*9880d681SAndroid Build Coastguard Worker   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
4995*9880d681SAndroid Build Coastguard Worker   const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
4996*9880d681SAndroid Build Coastguard Worker       *L->getHeader()->getParent());
4997*9880d681SAndroid Build Coastguard Worker   bool Changed = false;
4998*9880d681SAndroid Build Coastguard Worker 
4999*9880d681SAndroid Build Coastguard Worker   // Run the main LSR transformation.
5000*9880d681SAndroid Build Coastguard Worker   Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
5001*9880d681SAndroid Build Coastguard Worker 
5002*9880d681SAndroid Build Coastguard Worker   // Remove any extra phis created by processing inner loops.
5003*9880d681SAndroid Build Coastguard Worker   Changed |= DeleteDeadPHIs(L->getHeader());
5004*9880d681SAndroid Build Coastguard Worker   if (EnablePhiElim && L->isLoopSimplifyForm()) {
5005*9880d681SAndroid Build Coastguard Worker     SmallVector<WeakVH, 16> DeadInsts;
5006*9880d681SAndroid Build Coastguard Worker     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
5007*9880d681SAndroid Build Coastguard Worker     SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
5008*9880d681SAndroid Build Coastguard Worker                           "lsr");
5009*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
5010*9880d681SAndroid Build Coastguard Worker     Rewriter.setDebugType(DEBUG_TYPE);
5011*9880d681SAndroid Build Coastguard Worker #endif
5012*9880d681SAndroid Build Coastguard Worker     unsigned numFolded = Rewriter.replaceCongruentIVs(
5013*9880d681SAndroid Build Coastguard Worker         L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
5014*9880d681SAndroid Build Coastguard Worker         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
5015*9880d681SAndroid Build Coastguard Worker             *L->getHeader()->getParent()));
5016*9880d681SAndroid Build Coastguard Worker     if (numFolded) {
5017*9880d681SAndroid Build Coastguard Worker       Changed = true;
5018*9880d681SAndroid Build Coastguard Worker       DeleteTriviallyDeadInstructions(DeadInsts);
5019*9880d681SAndroid Build Coastguard Worker       DeleteDeadPHIs(L->getHeader());
5020*9880d681SAndroid Build Coastguard Worker     }
5021*9880d681SAndroid Build Coastguard Worker   }
5022*9880d681SAndroid Build Coastguard Worker   return Changed;
5023*9880d681SAndroid Build Coastguard Worker }
5024