1*9880d681SAndroid Build Coastguard Worker //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This file implements some loop unrolling utilities for loops with run-time
11*9880d681SAndroid Build Coastguard Worker // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
12*9880d681SAndroid Build Coastguard Worker // trip counts.
13*9880d681SAndroid Build Coastguard Worker //
14*9880d681SAndroid Build Coastguard Worker // The functions in this file are used to generate extra code when the
15*9880d681SAndroid Build Coastguard Worker // run-time trip count modulo the unroll factor is not 0. When this is the
16*9880d681SAndroid Build Coastguard Worker // case, we need to generate code to execute these 'left over' iterations.
17*9880d681SAndroid Build Coastguard Worker //
18*9880d681SAndroid Build Coastguard Worker // The current strategy generates an if-then-else sequence prior to the
19*9880d681SAndroid Build Coastguard Worker // unrolled loop to execute the 'left over' iterations before or after the
20*9880d681SAndroid Build Coastguard Worker // unrolled loop.
21*9880d681SAndroid Build Coastguard Worker //
22*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Utils/UnrollLoop.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
26*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/AliasAnalysis.h"
27*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopIterator.h"
28*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/LoopPass.h"
29*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/ScalarEvolution.h"
30*9880d681SAndroid Build Coastguard Worker #include "llvm/Analysis/ScalarEvolutionExpander.h"
31*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/BasicBlock.h"
32*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Dominators.h"
33*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Metadata.h"
34*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Module.h"
35*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Scalar.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/Transforms/Utils/Cloning.h"
40*9880d681SAndroid Build Coastguard Worker #include <algorithm>
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker using namespace llvm;
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "loop-unroll"
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Worker STATISTIC(NumRuntimeUnrolled,
47*9880d681SAndroid Build Coastguard Worker "Number of loops unrolled with run-time trip counts");
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker /// Connect the unrolling prolog code to the original loop.
50*9880d681SAndroid Build Coastguard Worker /// The unrolling prolog code contains code to execute the
51*9880d681SAndroid Build Coastguard Worker /// 'extra' iterations if the run-time trip count modulo the
52*9880d681SAndroid Build Coastguard Worker /// unroll count is non-zero.
53*9880d681SAndroid Build Coastguard Worker ///
54*9880d681SAndroid Build Coastguard Worker /// This function performs the following:
55*9880d681SAndroid Build Coastguard Worker /// - Create PHI nodes at prolog end block to combine values
56*9880d681SAndroid Build Coastguard Worker /// that exit the prolog code and jump around the prolog.
57*9880d681SAndroid Build Coastguard Worker /// - Add a PHI operand to a PHI node at the loop exit block
58*9880d681SAndroid Build Coastguard Worker /// for values that exit the prolog and go around the loop.
59*9880d681SAndroid Build Coastguard Worker /// - Branch around the original loop if the trip count is less
60*9880d681SAndroid Build Coastguard Worker /// than the unroll factor.
61*9880d681SAndroid Build Coastguard Worker ///
ConnectProlog(Loop * L,Value * BECount,unsigned Count,BasicBlock * PrologExit,BasicBlock * PreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)62*9880d681SAndroid Build Coastguard Worker static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
63*9880d681SAndroid Build Coastguard Worker BasicBlock *PrologExit, BasicBlock *PreHeader,
64*9880d681SAndroid Build Coastguard Worker BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
65*9880d681SAndroid Build Coastguard Worker DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
66*9880d681SAndroid Build Coastguard Worker BasicBlock *Latch = L->getLoopLatch();
67*9880d681SAndroid Build Coastguard Worker assert(Latch && "Loop must have a latch");
68*9880d681SAndroid Build Coastguard Worker BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Worker // Create a PHI node for each outgoing value from the original loop
71*9880d681SAndroid Build Coastguard Worker // (which means it is an outgoing value from the prolog code too).
72*9880d681SAndroid Build Coastguard Worker // The new PHI node is inserted in the prolog end basic block.
73*9880d681SAndroid Build Coastguard Worker // The new PHI node value is added as an operand of a PHI node in either
74*9880d681SAndroid Build Coastguard Worker // the loop header or the loop exit block.
75*9880d681SAndroid Build Coastguard Worker for (BasicBlock *Succ : successors(Latch)) {
76*9880d681SAndroid Build Coastguard Worker for (Instruction &BBI : *Succ) {
77*9880d681SAndroid Build Coastguard Worker PHINode *PN = dyn_cast<PHINode>(&BBI);
78*9880d681SAndroid Build Coastguard Worker // Exit when we passed all PHI nodes.
79*9880d681SAndroid Build Coastguard Worker if (!PN)
80*9880d681SAndroid Build Coastguard Worker break;
81*9880d681SAndroid Build Coastguard Worker // Add a new PHI node to the prolog end block and add the
82*9880d681SAndroid Build Coastguard Worker // appropriate incoming values.
83*9880d681SAndroid Build Coastguard Worker PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
84*9880d681SAndroid Build Coastguard Worker PrologExit->getFirstNonPHI());
85*9880d681SAndroid Build Coastguard Worker // Adding a value to the new PHI node from the original loop preheader.
86*9880d681SAndroid Build Coastguard Worker // This is the value that skips all the prolog code.
87*9880d681SAndroid Build Coastguard Worker if (L->contains(PN)) {
88*9880d681SAndroid Build Coastguard Worker NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
89*9880d681SAndroid Build Coastguard Worker PreHeader);
90*9880d681SAndroid Build Coastguard Worker } else {
91*9880d681SAndroid Build Coastguard Worker NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
92*9880d681SAndroid Build Coastguard Worker }
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Worker Value *V = PN->getIncomingValueForBlock(Latch);
95*9880d681SAndroid Build Coastguard Worker if (Instruction *I = dyn_cast<Instruction>(V)) {
96*9880d681SAndroid Build Coastguard Worker if (L->contains(I)) {
97*9880d681SAndroid Build Coastguard Worker V = VMap.lookup(I);
98*9880d681SAndroid Build Coastguard Worker }
99*9880d681SAndroid Build Coastguard Worker }
100*9880d681SAndroid Build Coastguard Worker // Adding a value to the new PHI node from the last prolog block
101*9880d681SAndroid Build Coastguard Worker // that was created.
102*9880d681SAndroid Build Coastguard Worker NewPN->addIncoming(V, PrologLatch);
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Worker // Update the existing PHI node operand with the value from the
105*9880d681SAndroid Build Coastguard Worker // new PHI node. How this is done depends on if the existing
106*9880d681SAndroid Build Coastguard Worker // PHI node is in the original loop block, or the exit block.
107*9880d681SAndroid Build Coastguard Worker if (L->contains(PN)) {
108*9880d681SAndroid Build Coastguard Worker PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
109*9880d681SAndroid Build Coastguard Worker } else {
110*9880d681SAndroid Build Coastguard Worker PN->addIncoming(NewPN, PrologExit);
111*9880d681SAndroid Build Coastguard Worker }
112*9880d681SAndroid Build Coastguard Worker }
113*9880d681SAndroid Build Coastguard Worker }
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker // Create a branch around the original loop, which is taken if there are no
116*9880d681SAndroid Build Coastguard Worker // iterations remaining to be executed after running the prologue.
117*9880d681SAndroid Build Coastguard Worker Instruction *InsertPt = PrologExit->getTerminator();
118*9880d681SAndroid Build Coastguard Worker IRBuilder<> B(InsertPt);
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Worker assert(Count != 0 && "nonsensical Count!");
121*9880d681SAndroid Build Coastguard Worker
122*9880d681SAndroid Build Coastguard Worker // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
123*9880d681SAndroid Build Coastguard Worker // This means %xtraiter is (BECount + 1) and all of the iterations of this
124*9880d681SAndroid Build Coastguard Worker // loop were executed by the prologue. Note that if BECount <u (Count - 1)
125*9880d681SAndroid Build Coastguard Worker // then (BECount + 1) cannot unsigned-overflow.
126*9880d681SAndroid Build Coastguard Worker Value *BrLoopExit =
127*9880d681SAndroid Build Coastguard Worker B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
128*9880d681SAndroid Build Coastguard Worker BasicBlock *Exit = L->getUniqueExitBlock();
129*9880d681SAndroid Build Coastguard Worker assert(Exit && "Loop must have a single exit block only");
130*9880d681SAndroid Build Coastguard Worker // Split the exit to maintain loop canonicalization guarantees
131*9880d681SAndroid Build Coastguard Worker SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
132*9880d681SAndroid Build Coastguard Worker SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
133*9880d681SAndroid Build Coastguard Worker PreserveLCSSA);
134*9880d681SAndroid Build Coastguard Worker // Add the branch to the exit block (around the unrolled loop)
135*9880d681SAndroid Build Coastguard Worker B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
136*9880d681SAndroid Build Coastguard Worker InsertPt->eraseFromParent();
137*9880d681SAndroid Build Coastguard Worker }
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker /// Connect the unrolling epilog code to the original loop.
140*9880d681SAndroid Build Coastguard Worker /// The unrolling epilog code contains code to execute the
141*9880d681SAndroid Build Coastguard Worker /// 'extra' iterations if the run-time trip count modulo the
142*9880d681SAndroid Build Coastguard Worker /// unroll count is non-zero.
143*9880d681SAndroid Build Coastguard Worker ///
144*9880d681SAndroid Build Coastguard Worker /// This function performs the following:
145*9880d681SAndroid Build Coastguard Worker /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
146*9880d681SAndroid Build Coastguard Worker /// - Create PHI nodes at the unrolling loop exit to combine
147*9880d681SAndroid Build Coastguard Worker /// values that exit the unrolling loop code and jump around it.
148*9880d681SAndroid Build Coastguard Worker /// - Update PHI operands in the epilog loop by the new PHI nodes
149*9880d681SAndroid Build Coastguard Worker /// - Branch around the epilog loop if extra iters (ModVal) is zero.
150*9880d681SAndroid Build Coastguard Worker ///
ConnectEpilog(Loop * L,Value * ModVal,BasicBlock * NewExit,BasicBlock * Exit,BasicBlock * PreHeader,BasicBlock * EpilogPreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)151*9880d681SAndroid Build Coastguard Worker static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
152*9880d681SAndroid Build Coastguard Worker BasicBlock *Exit, BasicBlock *PreHeader,
153*9880d681SAndroid Build Coastguard Worker BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
154*9880d681SAndroid Build Coastguard Worker ValueToValueMapTy &VMap, DominatorTree *DT,
155*9880d681SAndroid Build Coastguard Worker LoopInfo *LI, bool PreserveLCSSA) {
156*9880d681SAndroid Build Coastguard Worker BasicBlock *Latch = L->getLoopLatch();
157*9880d681SAndroid Build Coastguard Worker assert(Latch && "Loop must have a latch");
158*9880d681SAndroid Build Coastguard Worker BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker // Loop structure should be the following:
161*9880d681SAndroid Build Coastguard Worker //
162*9880d681SAndroid Build Coastguard Worker // PreHeader
163*9880d681SAndroid Build Coastguard Worker // NewPreHeader
164*9880d681SAndroid Build Coastguard Worker // Header
165*9880d681SAndroid Build Coastguard Worker // ...
166*9880d681SAndroid Build Coastguard Worker // Latch
167*9880d681SAndroid Build Coastguard Worker // NewExit (PN)
168*9880d681SAndroid Build Coastguard Worker // EpilogPreHeader
169*9880d681SAndroid Build Coastguard Worker // EpilogHeader
170*9880d681SAndroid Build Coastguard Worker // ...
171*9880d681SAndroid Build Coastguard Worker // EpilogLatch
172*9880d681SAndroid Build Coastguard Worker // Exit (EpilogPN)
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker // Update PHI nodes at NewExit and Exit.
175*9880d681SAndroid Build Coastguard Worker for (Instruction &BBI : *NewExit) {
176*9880d681SAndroid Build Coastguard Worker PHINode *PN = dyn_cast<PHINode>(&BBI);
177*9880d681SAndroid Build Coastguard Worker // Exit when we passed all PHI nodes.
178*9880d681SAndroid Build Coastguard Worker if (!PN)
179*9880d681SAndroid Build Coastguard Worker break;
180*9880d681SAndroid Build Coastguard Worker // PN should be used in another PHI located in Exit block as
181*9880d681SAndroid Build Coastguard Worker // Exit was split by SplitBlockPredecessors into Exit and NewExit
182*9880d681SAndroid Build Coastguard Worker // Basicaly it should look like:
183*9880d681SAndroid Build Coastguard Worker // NewExit:
184*9880d681SAndroid Build Coastguard Worker // PN = PHI [I, Latch]
185*9880d681SAndroid Build Coastguard Worker // ...
186*9880d681SAndroid Build Coastguard Worker // Exit:
187*9880d681SAndroid Build Coastguard Worker // EpilogPN = PHI [PN, EpilogPreHeader]
188*9880d681SAndroid Build Coastguard Worker //
189*9880d681SAndroid Build Coastguard Worker // There is EpilogPreHeader incoming block instead of NewExit as
190*9880d681SAndroid Build Coastguard Worker // NewExit was spilt 1 more time to get EpilogPreHeader.
191*9880d681SAndroid Build Coastguard Worker assert(PN->hasOneUse() && "The phi should have 1 use");
192*9880d681SAndroid Build Coastguard Worker PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
193*9880d681SAndroid Build Coastguard Worker assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
194*9880d681SAndroid Build Coastguard Worker
195*9880d681SAndroid Build Coastguard Worker // Add incoming PreHeader from branch around the Loop
196*9880d681SAndroid Build Coastguard Worker PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Worker Value *V = PN->getIncomingValueForBlock(Latch);
199*9880d681SAndroid Build Coastguard Worker Instruction *I = dyn_cast<Instruction>(V);
200*9880d681SAndroid Build Coastguard Worker if (I && L->contains(I))
201*9880d681SAndroid Build Coastguard Worker // If value comes from an instruction in the loop add VMap value.
202*9880d681SAndroid Build Coastguard Worker V = VMap.lookup(I);
203*9880d681SAndroid Build Coastguard Worker // For the instruction out of the loop, constant or undefined value
204*9880d681SAndroid Build Coastguard Worker // insert value itself.
205*9880d681SAndroid Build Coastguard Worker EpilogPN->addIncoming(V, EpilogLatch);
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Worker assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
208*9880d681SAndroid Build Coastguard Worker "EpilogPN should have EpilogPreHeader incoming block");
209*9880d681SAndroid Build Coastguard Worker // Change EpilogPreHeader incoming block to NewExit.
210*9880d681SAndroid Build Coastguard Worker EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
211*9880d681SAndroid Build Coastguard Worker NewExit);
212*9880d681SAndroid Build Coastguard Worker // Now PHIs should look like:
213*9880d681SAndroid Build Coastguard Worker // NewExit:
214*9880d681SAndroid Build Coastguard Worker // PN = PHI [I, Latch], [undef, PreHeader]
215*9880d681SAndroid Build Coastguard Worker // ...
216*9880d681SAndroid Build Coastguard Worker // Exit:
217*9880d681SAndroid Build Coastguard Worker // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
218*9880d681SAndroid Build Coastguard Worker }
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Worker // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
221*9880d681SAndroid Build Coastguard Worker // Update corresponding PHI nodes in epilog loop.
222*9880d681SAndroid Build Coastguard Worker for (BasicBlock *Succ : successors(Latch)) {
223*9880d681SAndroid Build Coastguard Worker // Skip this as we already updated phis in exit blocks.
224*9880d681SAndroid Build Coastguard Worker if (!L->contains(Succ))
225*9880d681SAndroid Build Coastguard Worker continue;
226*9880d681SAndroid Build Coastguard Worker for (Instruction &BBI : *Succ) {
227*9880d681SAndroid Build Coastguard Worker PHINode *PN = dyn_cast<PHINode>(&BBI);
228*9880d681SAndroid Build Coastguard Worker // Exit when we passed all PHI nodes.
229*9880d681SAndroid Build Coastguard Worker if (!PN)
230*9880d681SAndroid Build Coastguard Worker break;
231*9880d681SAndroid Build Coastguard Worker // Add new PHI nodes to the loop exit block and update epilog
232*9880d681SAndroid Build Coastguard Worker // PHIs with the new PHI values.
233*9880d681SAndroid Build Coastguard Worker PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
234*9880d681SAndroid Build Coastguard Worker NewExit->getFirstNonPHI());
235*9880d681SAndroid Build Coastguard Worker // Adding a value to the new PHI node from the unrolling loop preheader.
236*9880d681SAndroid Build Coastguard Worker NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
237*9880d681SAndroid Build Coastguard Worker // Adding a value to the new PHI node from the unrolling loop latch.
238*9880d681SAndroid Build Coastguard Worker NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
239*9880d681SAndroid Build Coastguard Worker
240*9880d681SAndroid Build Coastguard Worker // Update the existing PHI node operand with the value from the new PHI
241*9880d681SAndroid Build Coastguard Worker // node. Corresponding instruction in epilog loop should be PHI.
242*9880d681SAndroid Build Coastguard Worker PHINode *VPN = cast<PHINode>(VMap[&BBI]);
243*9880d681SAndroid Build Coastguard Worker VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
244*9880d681SAndroid Build Coastguard Worker }
245*9880d681SAndroid Build Coastguard Worker }
246*9880d681SAndroid Build Coastguard Worker
247*9880d681SAndroid Build Coastguard Worker Instruction *InsertPt = NewExit->getTerminator();
248*9880d681SAndroid Build Coastguard Worker IRBuilder<> B(InsertPt);
249*9880d681SAndroid Build Coastguard Worker Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
250*9880d681SAndroid Build Coastguard Worker assert(Exit && "Loop must have a single exit block only");
251*9880d681SAndroid Build Coastguard Worker // Split the exit to maintain loop canonicalization guarantees
252*9880d681SAndroid Build Coastguard Worker SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
253*9880d681SAndroid Build Coastguard Worker SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
254*9880d681SAndroid Build Coastguard Worker PreserveLCSSA);
255*9880d681SAndroid Build Coastguard Worker // Add the branch to the exit block (around the unrolling loop)
256*9880d681SAndroid Build Coastguard Worker B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
257*9880d681SAndroid Build Coastguard Worker InsertPt->eraseFromParent();
258*9880d681SAndroid Build Coastguard Worker }
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Worker /// Create a clone of the blocks in a loop and connect them together.
261*9880d681SAndroid Build Coastguard Worker /// If CreateRemainderLoop is false, loop structure will not be cloned,
262*9880d681SAndroid Build Coastguard Worker /// otherwise a new loop will be created including all cloned blocks, and the
263*9880d681SAndroid Build Coastguard Worker /// iterator of it switches to count NewIter down to 0.
264*9880d681SAndroid Build Coastguard Worker /// The cloned blocks should be inserted between InsertTop and InsertBot.
265*9880d681SAndroid Build Coastguard Worker /// If loop structure is cloned InsertTop should be new preheader, InsertBot
266*9880d681SAndroid Build Coastguard Worker /// new loop exit.
267*9880d681SAndroid Build Coastguard Worker ///
CloneLoopBlocks(Loop * L,Value * NewIter,const bool CreateRemainderLoop,const bool UseEpilogRemainder,BasicBlock * InsertTop,BasicBlock * InsertBot,BasicBlock * Preheader,std::vector<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,LoopInfo * LI)268*9880d681SAndroid Build Coastguard Worker static void CloneLoopBlocks(Loop *L, Value *NewIter,
269*9880d681SAndroid Build Coastguard Worker const bool CreateRemainderLoop,
270*9880d681SAndroid Build Coastguard Worker const bool UseEpilogRemainder,
271*9880d681SAndroid Build Coastguard Worker BasicBlock *InsertTop, BasicBlock *InsertBot,
272*9880d681SAndroid Build Coastguard Worker BasicBlock *Preheader,
273*9880d681SAndroid Build Coastguard Worker std::vector<BasicBlock *> &NewBlocks,
274*9880d681SAndroid Build Coastguard Worker LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
275*9880d681SAndroid Build Coastguard Worker LoopInfo *LI) {
276*9880d681SAndroid Build Coastguard Worker StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
277*9880d681SAndroid Build Coastguard Worker BasicBlock *Header = L->getHeader();
278*9880d681SAndroid Build Coastguard Worker BasicBlock *Latch = L->getLoopLatch();
279*9880d681SAndroid Build Coastguard Worker Function *F = Header->getParent();
280*9880d681SAndroid Build Coastguard Worker LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
281*9880d681SAndroid Build Coastguard Worker LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
282*9880d681SAndroid Build Coastguard Worker Loop *NewLoop = nullptr;
283*9880d681SAndroid Build Coastguard Worker Loop *ParentLoop = L->getParentLoop();
284*9880d681SAndroid Build Coastguard Worker if (CreateRemainderLoop) {
285*9880d681SAndroid Build Coastguard Worker NewLoop = new Loop();
286*9880d681SAndroid Build Coastguard Worker if (ParentLoop)
287*9880d681SAndroid Build Coastguard Worker ParentLoop->addChildLoop(NewLoop);
288*9880d681SAndroid Build Coastguard Worker else
289*9880d681SAndroid Build Coastguard Worker LI->addTopLevelLoop(NewLoop);
290*9880d681SAndroid Build Coastguard Worker }
291*9880d681SAndroid Build Coastguard Worker
292*9880d681SAndroid Build Coastguard Worker // For each block in the original loop, create a new copy,
293*9880d681SAndroid Build Coastguard Worker // and update the value map with the newly created values.
294*9880d681SAndroid Build Coastguard Worker for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
295*9880d681SAndroid Build Coastguard Worker BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
296*9880d681SAndroid Build Coastguard Worker NewBlocks.push_back(NewBB);
297*9880d681SAndroid Build Coastguard Worker
298*9880d681SAndroid Build Coastguard Worker if (NewLoop)
299*9880d681SAndroid Build Coastguard Worker NewLoop->addBasicBlockToLoop(NewBB, *LI);
300*9880d681SAndroid Build Coastguard Worker else if (ParentLoop)
301*9880d681SAndroid Build Coastguard Worker ParentLoop->addBasicBlockToLoop(NewBB, *LI);
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Worker VMap[*BB] = NewBB;
304*9880d681SAndroid Build Coastguard Worker if (Header == *BB) {
305*9880d681SAndroid Build Coastguard Worker // For the first block, add a CFG connection to this newly
306*9880d681SAndroid Build Coastguard Worker // created block.
307*9880d681SAndroid Build Coastguard Worker InsertTop->getTerminator()->setSuccessor(0, NewBB);
308*9880d681SAndroid Build Coastguard Worker }
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Worker if (Latch == *BB) {
311*9880d681SAndroid Build Coastguard Worker // For the last block, if CreateRemainderLoop is false, create a direct
312*9880d681SAndroid Build Coastguard Worker // jump to InsertBot. If not, create a loop back to cloned head.
313*9880d681SAndroid Build Coastguard Worker VMap.erase((*BB)->getTerminator());
314*9880d681SAndroid Build Coastguard Worker BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
315*9880d681SAndroid Build Coastguard Worker BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
316*9880d681SAndroid Build Coastguard Worker IRBuilder<> Builder(LatchBR);
317*9880d681SAndroid Build Coastguard Worker if (!CreateRemainderLoop) {
318*9880d681SAndroid Build Coastguard Worker Builder.CreateBr(InsertBot);
319*9880d681SAndroid Build Coastguard Worker } else {
320*9880d681SAndroid Build Coastguard Worker PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
321*9880d681SAndroid Build Coastguard Worker suffix + ".iter",
322*9880d681SAndroid Build Coastguard Worker FirstLoopBB->getFirstNonPHI());
323*9880d681SAndroid Build Coastguard Worker Value *IdxSub =
324*9880d681SAndroid Build Coastguard Worker Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
325*9880d681SAndroid Build Coastguard Worker NewIdx->getName() + ".sub");
326*9880d681SAndroid Build Coastguard Worker Value *IdxCmp =
327*9880d681SAndroid Build Coastguard Worker Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
328*9880d681SAndroid Build Coastguard Worker Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
329*9880d681SAndroid Build Coastguard Worker NewIdx->addIncoming(NewIter, InsertTop);
330*9880d681SAndroid Build Coastguard Worker NewIdx->addIncoming(IdxSub, NewBB);
331*9880d681SAndroid Build Coastguard Worker }
332*9880d681SAndroid Build Coastguard Worker LatchBR->eraseFromParent();
333*9880d681SAndroid Build Coastguard Worker }
334*9880d681SAndroid Build Coastguard Worker }
335*9880d681SAndroid Build Coastguard Worker
336*9880d681SAndroid Build Coastguard Worker // Change the incoming values to the ones defined in the preheader or
337*9880d681SAndroid Build Coastguard Worker // cloned loop.
338*9880d681SAndroid Build Coastguard Worker for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
339*9880d681SAndroid Build Coastguard Worker PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
340*9880d681SAndroid Build Coastguard Worker if (!CreateRemainderLoop) {
341*9880d681SAndroid Build Coastguard Worker if (UseEpilogRemainder) {
342*9880d681SAndroid Build Coastguard Worker unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
343*9880d681SAndroid Build Coastguard Worker NewPHI->setIncomingBlock(idx, InsertTop);
344*9880d681SAndroid Build Coastguard Worker NewPHI->removeIncomingValue(Latch, false);
345*9880d681SAndroid Build Coastguard Worker } else {
346*9880d681SAndroid Build Coastguard Worker VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
347*9880d681SAndroid Build Coastguard Worker cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
348*9880d681SAndroid Build Coastguard Worker }
349*9880d681SAndroid Build Coastguard Worker } else {
350*9880d681SAndroid Build Coastguard Worker unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
351*9880d681SAndroid Build Coastguard Worker NewPHI->setIncomingBlock(idx, InsertTop);
352*9880d681SAndroid Build Coastguard Worker BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
353*9880d681SAndroid Build Coastguard Worker idx = NewPHI->getBasicBlockIndex(Latch);
354*9880d681SAndroid Build Coastguard Worker Value *InVal = NewPHI->getIncomingValue(idx);
355*9880d681SAndroid Build Coastguard Worker NewPHI->setIncomingBlock(idx, NewLatch);
356*9880d681SAndroid Build Coastguard Worker if (Value *V = VMap.lookup(InVal))
357*9880d681SAndroid Build Coastguard Worker NewPHI->setIncomingValue(idx, V);
358*9880d681SAndroid Build Coastguard Worker }
359*9880d681SAndroid Build Coastguard Worker }
360*9880d681SAndroid Build Coastguard Worker if (NewLoop) {
361*9880d681SAndroid Build Coastguard Worker // Add unroll disable metadata to disable future unrolling for this loop.
362*9880d681SAndroid Build Coastguard Worker SmallVector<Metadata *, 4> MDs;
363*9880d681SAndroid Build Coastguard Worker // Reserve first location for self reference to the LoopID metadata node.
364*9880d681SAndroid Build Coastguard Worker MDs.push_back(nullptr);
365*9880d681SAndroid Build Coastguard Worker MDNode *LoopID = NewLoop->getLoopID();
366*9880d681SAndroid Build Coastguard Worker if (LoopID) {
367*9880d681SAndroid Build Coastguard Worker // First remove any existing loop unrolling metadata.
368*9880d681SAndroid Build Coastguard Worker for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
369*9880d681SAndroid Build Coastguard Worker bool IsUnrollMetadata = false;
370*9880d681SAndroid Build Coastguard Worker MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
371*9880d681SAndroid Build Coastguard Worker if (MD) {
372*9880d681SAndroid Build Coastguard Worker const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
373*9880d681SAndroid Build Coastguard Worker IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
374*9880d681SAndroid Build Coastguard Worker }
375*9880d681SAndroid Build Coastguard Worker if (!IsUnrollMetadata)
376*9880d681SAndroid Build Coastguard Worker MDs.push_back(LoopID->getOperand(i));
377*9880d681SAndroid Build Coastguard Worker }
378*9880d681SAndroid Build Coastguard Worker }
379*9880d681SAndroid Build Coastguard Worker
380*9880d681SAndroid Build Coastguard Worker LLVMContext &Context = NewLoop->getHeader()->getContext();
381*9880d681SAndroid Build Coastguard Worker SmallVector<Metadata *, 1> DisableOperands;
382*9880d681SAndroid Build Coastguard Worker DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
383*9880d681SAndroid Build Coastguard Worker MDNode *DisableNode = MDNode::get(Context, DisableOperands);
384*9880d681SAndroid Build Coastguard Worker MDs.push_back(DisableNode);
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker MDNode *NewLoopID = MDNode::get(Context, MDs);
387*9880d681SAndroid Build Coastguard Worker // Set operand 0 to refer to the loop id itself.
388*9880d681SAndroid Build Coastguard Worker NewLoopID->replaceOperandWith(0, NewLoopID);
389*9880d681SAndroid Build Coastguard Worker NewLoop->setLoopID(NewLoopID);
390*9880d681SAndroid Build Coastguard Worker }
391*9880d681SAndroid Build Coastguard Worker }
392*9880d681SAndroid Build Coastguard Worker
393*9880d681SAndroid Build Coastguard Worker /// Insert code in the prolog/epilog code when unrolling a loop with a
394*9880d681SAndroid Build Coastguard Worker /// run-time trip-count.
395*9880d681SAndroid Build Coastguard Worker ///
396*9880d681SAndroid Build Coastguard Worker /// This method assumes that the loop unroll factor is total number
397*9880d681SAndroid Build Coastguard Worker /// of loop bodies in the loop after unrolling. (Some folks refer
398*9880d681SAndroid Build Coastguard Worker /// to the unroll factor as the number of *extra* copies added).
399*9880d681SAndroid Build Coastguard Worker /// We assume also that the loop unroll factor is a power-of-two. So, after
400*9880d681SAndroid Build Coastguard Worker /// unrolling the loop, the number of loop bodies executed is 2,
401*9880d681SAndroid Build Coastguard Worker /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
402*9880d681SAndroid Build Coastguard Worker /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
403*9880d681SAndroid Build Coastguard Worker /// the switch instruction is generated.
404*9880d681SAndroid Build Coastguard Worker ///
405*9880d681SAndroid Build Coastguard Worker /// ***Prolog case***
406*9880d681SAndroid Build Coastguard Worker /// extraiters = tripcount % loopfactor
407*9880d681SAndroid Build Coastguard Worker /// if (extraiters == 0) jump Loop:
408*9880d681SAndroid Build Coastguard Worker /// else jump Prol:
409*9880d681SAndroid Build Coastguard Worker /// Prol: LoopBody;
410*9880d681SAndroid Build Coastguard Worker /// extraiters -= 1 // Omitted if unroll factor is 2.
411*9880d681SAndroid Build Coastguard Worker /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
412*9880d681SAndroid Build Coastguard Worker /// if (tripcount < loopfactor) jump End:
413*9880d681SAndroid Build Coastguard Worker /// Loop:
414*9880d681SAndroid Build Coastguard Worker /// ...
415*9880d681SAndroid Build Coastguard Worker /// End:
416*9880d681SAndroid Build Coastguard Worker ///
417*9880d681SAndroid Build Coastguard Worker /// ***Epilog case***
418*9880d681SAndroid Build Coastguard Worker /// extraiters = tripcount % loopfactor
419*9880d681SAndroid Build Coastguard Worker /// if (tripcount < loopfactor) jump LoopExit:
420*9880d681SAndroid Build Coastguard Worker /// unroll_iters = tripcount - extraiters
421*9880d681SAndroid Build Coastguard Worker /// Loop: LoopBody; (executes unroll_iter times);
422*9880d681SAndroid Build Coastguard Worker /// unroll_iter -= 1
423*9880d681SAndroid Build Coastguard Worker /// if (unroll_iter != 0) jump Loop:
424*9880d681SAndroid Build Coastguard Worker /// LoopExit:
425*9880d681SAndroid Build Coastguard Worker /// if (extraiters == 0) jump EpilExit:
426*9880d681SAndroid Build Coastguard Worker /// Epil: LoopBody; (executes extraiters times)
427*9880d681SAndroid Build Coastguard Worker /// extraiters -= 1 // Omitted if unroll factor is 2.
428*9880d681SAndroid Build Coastguard Worker /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
429*9880d681SAndroid Build Coastguard Worker /// EpilExit:
430*9880d681SAndroid Build Coastguard Worker
UnrollRuntimeLoopRemainder(Loop * L,unsigned Count,bool AllowExpensiveTripCount,bool UseEpilogRemainder,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,bool PreserveLCSSA)431*9880d681SAndroid Build Coastguard Worker bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
432*9880d681SAndroid Build Coastguard Worker bool AllowExpensiveTripCount,
433*9880d681SAndroid Build Coastguard Worker bool UseEpilogRemainder,
434*9880d681SAndroid Build Coastguard Worker LoopInfo *LI, ScalarEvolution *SE,
435*9880d681SAndroid Build Coastguard Worker DominatorTree *DT, bool PreserveLCSSA) {
436*9880d681SAndroid Build Coastguard Worker // for now, only unroll loops that contain a single exit
437*9880d681SAndroid Build Coastguard Worker if (!L->getExitingBlock())
438*9880d681SAndroid Build Coastguard Worker return false;
439*9880d681SAndroid Build Coastguard Worker
440*9880d681SAndroid Build Coastguard Worker // Make sure the loop is in canonical form, and there is a single
441*9880d681SAndroid Build Coastguard Worker // exit block only.
442*9880d681SAndroid Build Coastguard Worker if (!L->isLoopSimplifyForm())
443*9880d681SAndroid Build Coastguard Worker return false;
444*9880d681SAndroid Build Coastguard Worker BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
445*9880d681SAndroid Build Coastguard Worker if (!Exit)
446*9880d681SAndroid Build Coastguard Worker return false;
447*9880d681SAndroid Build Coastguard Worker
448*9880d681SAndroid Build Coastguard Worker // Use Scalar Evolution to compute the trip count. This allows more loops to
449*9880d681SAndroid Build Coastguard Worker // be unrolled than relying on induction var simplification.
450*9880d681SAndroid Build Coastguard Worker if (!SE)
451*9880d681SAndroid Build Coastguard Worker return false;
452*9880d681SAndroid Build Coastguard Worker
453*9880d681SAndroid Build Coastguard Worker // Only unroll loops with a computable trip count, and the trip count needs
454*9880d681SAndroid Build Coastguard Worker // to be an int value (allowing a pointer type is a TODO item).
455*9880d681SAndroid Build Coastguard Worker const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
456*9880d681SAndroid Build Coastguard Worker if (isa<SCEVCouldNotCompute>(BECountSC) ||
457*9880d681SAndroid Build Coastguard Worker !BECountSC->getType()->isIntegerTy())
458*9880d681SAndroid Build Coastguard Worker return false;
459*9880d681SAndroid Build Coastguard Worker
460*9880d681SAndroid Build Coastguard Worker unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
461*9880d681SAndroid Build Coastguard Worker
462*9880d681SAndroid Build Coastguard Worker // Add 1 since the backedge count doesn't include the first loop iteration.
463*9880d681SAndroid Build Coastguard Worker const SCEV *TripCountSC =
464*9880d681SAndroid Build Coastguard Worker SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
465*9880d681SAndroid Build Coastguard Worker if (isa<SCEVCouldNotCompute>(TripCountSC))
466*9880d681SAndroid Build Coastguard Worker return false;
467*9880d681SAndroid Build Coastguard Worker
468*9880d681SAndroid Build Coastguard Worker BasicBlock *Header = L->getHeader();
469*9880d681SAndroid Build Coastguard Worker BasicBlock *PreHeader = L->getLoopPreheader();
470*9880d681SAndroid Build Coastguard Worker BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
471*9880d681SAndroid Build Coastguard Worker const DataLayout &DL = Header->getModule()->getDataLayout();
472*9880d681SAndroid Build Coastguard Worker SCEVExpander Expander(*SE, DL, "loop-unroll");
473*9880d681SAndroid Build Coastguard Worker if (!AllowExpensiveTripCount &&
474*9880d681SAndroid Build Coastguard Worker Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
475*9880d681SAndroid Build Coastguard Worker return false;
476*9880d681SAndroid Build Coastguard Worker
477*9880d681SAndroid Build Coastguard Worker // This constraint lets us deal with an overflowing trip count easily; see the
478*9880d681SAndroid Build Coastguard Worker // comment on ModVal below.
479*9880d681SAndroid Build Coastguard Worker if (Log2_32(Count) > BEWidth)
480*9880d681SAndroid Build Coastguard Worker return false;
481*9880d681SAndroid Build Coastguard Worker
482*9880d681SAndroid Build Coastguard Worker // If this loop is nested, then the loop unroller changes the code in the
483*9880d681SAndroid Build Coastguard Worker // parent loop, so the Scalar Evolution pass needs to be run again.
484*9880d681SAndroid Build Coastguard Worker if (Loop *ParentLoop = L->getParentLoop())
485*9880d681SAndroid Build Coastguard Worker SE->forgetLoop(ParentLoop);
486*9880d681SAndroid Build Coastguard Worker
487*9880d681SAndroid Build Coastguard Worker BasicBlock *Latch = L->getLoopLatch();
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Worker // Loop structure is the following:
490*9880d681SAndroid Build Coastguard Worker //
491*9880d681SAndroid Build Coastguard Worker // PreHeader
492*9880d681SAndroid Build Coastguard Worker // Header
493*9880d681SAndroid Build Coastguard Worker // ...
494*9880d681SAndroid Build Coastguard Worker // Latch
495*9880d681SAndroid Build Coastguard Worker // Exit
496*9880d681SAndroid Build Coastguard Worker
497*9880d681SAndroid Build Coastguard Worker BasicBlock *NewPreHeader;
498*9880d681SAndroid Build Coastguard Worker BasicBlock *NewExit = nullptr;
499*9880d681SAndroid Build Coastguard Worker BasicBlock *PrologExit = nullptr;
500*9880d681SAndroid Build Coastguard Worker BasicBlock *EpilogPreHeader = nullptr;
501*9880d681SAndroid Build Coastguard Worker BasicBlock *PrologPreHeader = nullptr;
502*9880d681SAndroid Build Coastguard Worker
503*9880d681SAndroid Build Coastguard Worker if (UseEpilogRemainder) {
504*9880d681SAndroid Build Coastguard Worker // If epilog remainder
505*9880d681SAndroid Build Coastguard Worker // Split PreHeader to insert a branch around loop for unrolling.
506*9880d681SAndroid Build Coastguard Worker NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
507*9880d681SAndroid Build Coastguard Worker NewPreHeader->setName(PreHeader->getName() + ".new");
508*9880d681SAndroid Build Coastguard Worker // Split Exit to create phi nodes from branch above.
509*9880d681SAndroid Build Coastguard Worker SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
510*9880d681SAndroid Build Coastguard Worker NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
511*9880d681SAndroid Build Coastguard Worker DT, LI, PreserveLCSSA);
512*9880d681SAndroid Build Coastguard Worker // Split NewExit to insert epilog remainder loop.
513*9880d681SAndroid Build Coastguard Worker EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
514*9880d681SAndroid Build Coastguard Worker EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
515*9880d681SAndroid Build Coastguard Worker } else {
516*9880d681SAndroid Build Coastguard Worker // If prolog remainder
517*9880d681SAndroid Build Coastguard Worker // Split the original preheader twice to insert prolog remainder loop
518*9880d681SAndroid Build Coastguard Worker PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
519*9880d681SAndroid Build Coastguard Worker PrologPreHeader->setName(Header->getName() + ".prol.preheader");
520*9880d681SAndroid Build Coastguard Worker PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
521*9880d681SAndroid Build Coastguard Worker DT, LI);
522*9880d681SAndroid Build Coastguard Worker PrologExit->setName(Header->getName() + ".prol.loopexit");
523*9880d681SAndroid Build Coastguard Worker // Split PrologExit to get NewPreHeader.
524*9880d681SAndroid Build Coastguard Worker NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
525*9880d681SAndroid Build Coastguard Worker NewPreHeader->setName(PreHeader->getName() + ".new");
526*9880d681SAndroid Build Coastguard Worker }
527*9880d681SAndroid Build Coastguard Worker // Loop structure should be the following:
528*9880d681SAndroid Build Coastguard Worker // Epilog Prolog
529*9880d681SAndroid Build Coastguard Worker //
530*9880d681SAndroid Build Coastguard Worker // PreHeader PreHeader
531*9880d681SAndroid Build Coastguard Worker // *NewPreHeader *PrologPreHeader
532*9880d681SAndroid Build Coastguard Worker // Header *PrologExit
533*9880d681SAndroid Build Coastguard Worker // ... *NewPreHeader
534*9880d681SAndroid Build Coastguard Worker // Latch Header
535*9880d681SAndroid Build Coastguard Worker // *NewExit ...
536*9880d681SAndroid Build Coastguard Worker // *EpilogPreHeader Latch
537*9880d681SAndroid Build Coastguard Worker // Exit Exit
538*9880d681SAndroid Build Coastguard Worker
539*9880d681SAndroid Build Coastguard Worker // Calculate conditions for branch around loop for unrolling
540*9880d681SAndroid Build Coastguard Worker // in epilog case and around prolog remainder loop in prolog case.
541*9880d681SAndroid Build Coastguard Worker // Compute the number of extra iterations required, which is:
542*9880d681SAndroid Build Coastguard Worker // extra iterations = run-time trip count % loop unroll factor
543*9880d681SAndroid Build Coastguard Worker PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
544*9880d681SAndroid Build Coastguard Worker Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
545*9880d681SAndroid Build Coastguard Worker PreHeaderBR);
546*9880d681SAndroid Build Coastguard Worker Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
547*9880d681SAndroid Build Coastguard Worker PreHeaderBR);
548*9880d681SAndroid Build Coastguard Worker IRBuilder<> B(PreHeaderBR);
549*9880d681SAndroid Build Coastguard Worker Value *ModVal;
550*9880d681SAndroid Build Coastguard Worker // Calculate ModVal = (BECount + 1) % Count.
551*9880d681SAndroid Build Coastguard Worker // Note that TripCount is BECount + 1.
552*9880d681SAndroid Build Coastguard Worker if (isPowerOf2_32(Count)) {
553*9880d681SAndroid Build Coastguard Worker // When Count is power of 2 we don't BECount for epilog case, however we'll
554*9880d681SAndroid Build Coastguard Worker // need it for a branch around unrolling loop for prolog case.
555*9880d681SAndroid Build Coastguard Worker ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
556*9880d681SAndroid Build Coastguard Worker // 1. There are no iterations to be run in the prolog/epilog loop.
557*9880d681SAndroid Build Coastguard Worker // OR
558*9880d681SAndroid Build Coastguard Worker // 2. The addition computing TripCount overflowed.
559*9880d681SAndroid Build Coastguard Worker //
560*9880d681SAndroid Build Coastguard Worker // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
561*9880d681SAndroid Build Coastguard Worker // the number of iterations that remain to be run in the original loop is a
562*9880d681SAndroid Build Coastguard Worker // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
563*9880d681SAndroid Build Coastguard Worker // explicitly check this above).
564*9880d681SAndroid Build Coastguard Worker } else {
565*9880d681SAndroid Build Coastguard Worker // As (BECount + 1) can potentially unsigned overflow we count
566*9880d681SAndroid Build Coastguard Worker // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
567*9880d681SAndroid Build Coastguard Worker Value *ModValTmp = B.CreateURem(BECount,
568*9880d681SAndroid Build Coastguard Worker ConstantInt::get(BECount->getType(),
569*9880d681SAndroid Build Coastguard Worker Count));
570*9880d681SAndroid Build Coastguard Worker Value *ModValAdd = B.CreateAdd(ModValTmp,
571*9880d681SAndroid Build Coastguard Worker ConstantInt::get(ModValTmp->getType(), 1));
572*9880d681SAndroid Build Coastguard Worker // At that point (BECount % Count) + 1 could be equal to Count.
573*9880d681SAndroid Build Coastguard Worker // To handle this case we need to take mod by Count one more time.
574*9880d681SAndroid Build Coastguard Worker ModVal = B.CreateURem(ModValAdd,
575*9880d681SAndroid Build Coastguard Worker ConstantInt::get(BECount->getType(), Count),
576*9880d681SAndroid Build Coastguard Worker "xtraiter");
577*9880d681SAndroid Build Coastguard Worker }
578*9880d681SAndroid Build Coastguard Worker Value *BranchVal =
579*9880d681SAndroid Build Coastguard Worker UseEpilogRemainder ? B.CreateICmpULT(BECount,
580*9880d681SAndroid Build Coastguard Worker ConstantInt::get(BECount->getType(),
581*9880d681SAndroid Build Coastguard Worker Count - 1)) :
582*9880d681SAndroid Build Coastguard Worker B.CreateIsNotNull(ModVal, "lcmp.mod");
583*9880d681SAndroid Build Coastguard Worker BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
584*9880d681SAndroid Build Coastguard Worker BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
585*9880d681SAndroid Build Coastguard Worker // Branch to either remainder (extra iterations) loop or unrolling loop.
586*9880d681SAndroid Build Coastguard Worker B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
587*9880d681SAndroid Build Coastguard Worker PreHeaderBR->eraseFromParent();
588*9880d681SAndroid Build Coastguard Worker Function *F = Header->getParent();
589*9880d681SAndroid Build Coastguard Worker // Get an ordered list of blocks in the loop to help with the ordering of the
590*9880d681SAndroid Build Coastguard Worker // cloned blocks in the prolog/epilog code
591*9880d681SAndroid Build Coastguard Worker LoopBlocksDFS LoopBlocks(L);
592*9880d681SAndroid Build Coastguard Worker LoopBlocks.perform(LI);
593*9880d681SAndroid Build Coastguard Worker
594*9880d681SAndroid Build Coastguard Worker //
595*9880d681SAndroid Build Coastguard Worker // For each extra loop iteration, create a copy of the loop's basic blocks
596*9880d681SAndroid Build Coastguard Worker // and generate a condition that branches to the copy depending on the
597*9880d681SAndroid Build Coastguard Worker // number of 'left over' iterations.
598*9880d681SAndroid Build Coastguard Worker //
599*9880d681SAndroid Build Coastguard Worker std::vector<BasicBlock *> NewBlocks;
600*9880d681SAndroid Build Coastguard Worker ValueToValueMapTy VMap;
601*9880d681SAndroid Build Coastguard Worker
602*9880d681SAndroid Build Coastguard Worker // For unroll factor 2 remainder loop will have 1 iterations.
603*9880d681SAndroid Build Coastguard Worker // Do not create 1 iteration loop.
604*9880d681SAndroid Build Coastguard Worker bool CreateRemainderLoop = (Count != 2);
605*9880d681SAndroid Build Coastguard Worker
606*9880d681SAndroid Build Coastguard Worker // Clone all the basic blocks in the loop. If Count is 2, we don't clone
607*9880d681SAndroid Build Coastguard Worker // the loop, otherwise we create a cloned loop to execute the extra
608*9880d681SAndroid Build Coastguard Worker // iterations. This function adds the appropriate CFG connections.
609*9880d681SAndroid Build Coastguard Worker BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
610*9880d681SAndroid Build Coastguard Worker BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
611*9880d681SAndroid Build Coastguard Worker CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
612*9880d681SAndroid Build Coastguard Worker InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
613*9880d681SAndroid Build Coastguard Worker
614*9880d681SAndroid Build Coastguard Worker // Insert the cloned blocks into the function.
615*9880d681SAndroid Build Coastguard Worker F->getBasicBlockList().splice(InsertBot->getIterator(),
616*9880d681SAndroid Build Coastguard Worker F->getBasicBlockList(),
617*9880d681SAndroid Build Coastguard Worker NewBlocks[0]->getIterator(),
618*9880d681SAndroid Build Coastguard Worker F->end());
619*9880d681SAndroid Build Coastguard Worker
620*9880d681SAndroid Build Coastguard Worker // Loop structure should be the following:
621*9880d681SAndroid Build Coastguard Worker // Epilog Prolog
622*9880d681SAndroid Build Coastguard Worker //
623*9880d681SAndroid Build Coastguard Worker // PreHeader PreHeader
624*9880d681SAndroid Build Coastguard Worker // NewPreHeader PrologPreHeader
625*9880d681SAndroid Build Coastguard Worker // Header PrologHeader
626*9880d681SAndroid Build Coastguard Worker // ... ...
627*9880d681SAndroid Build Coastguard Worker // Latch PrologLatch
628*9880d681SAndroid Build Coastguard Worker // NewExit PrologExit
629*9880d681SAndroid Build Coastguard Worker // EpilogPreHeader NewPreHeader
630*9880d681SAndroid Build Coastguard Worker // EpilogHeader Header
631*9880d681SAndroid Build Coastguard Worker // ... ...
632*9880d681SAndroid Build Coastguard Worker // EpilogLatch Latch
633*9880d681SAndroid Build Coastguard Worker // Exit Exit
634*9880d681SAndroid Build Coastguard Worker
635*9880d681SAndroid Build Coastguard Worker // Rewrite the cloned instruction operands to use the values created when the
636*9880d681SAndroid Build Coastguard Worker // clone is created.
637*9880d681SAndroid Build Coastguard Worker for (BasicBlock *BB : NewBlocks) {
638*9880d681SAndroid Build Coastguard Worker for (Instruction &I : *BB) {
639*9880d681SAndroid Build Coastguard Worker RemapInstruction(&I, VMap,
640*9880d681SAndroid Build Coastguard Worker RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
641*9880d681SAndroid Build Coastguard Worker }
642*9880d681SAndroid Build Coastguard Worker }
643*9880d681SAndroid Build Coastguard Worker
644*9880d681SAndroid Build Coastguard Worker if (UseEpilogRemainder) {
645*9880d681SAndroid Build Coastguard Worker // Connect the epilog code to the original loop and update the
646*9880d681SAndroid Build Coastguard Worker // PHI functions.
647*9880d681SAndroid Build Coastguard Worker ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
648*9880d681SAndroid Build Coastguard Worker EpilogPreHeader, NewPreHeader, VMap, DT, LI,
649*9880d681SAndroid Build Coastguard Worker PreserveLCSSA);
650*9880d681SAndroid Build Coastguard Worker
651*9880d681SAndroid Build Coastguard Worker // Update counter in loop for unrolling.
652*9880d681SAndroid Build Coastguard Worker // I should be multiply of Count.
653*9880d681SAndroid Build Coastguard Worker IRBuilder<> B2(NewPreHeader->getTerminator());
654*9880d681SAndroid Build Coastguard Worker Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
655*9880d681SAndroid Build Coastguard Worker BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
656*9880d681SAndroid Build Coastguard Worker B2.SetInsertPoint(LatchBR);
657*9880d681SAndroid Build Coastguard Worker PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
658*9880d681SAndroid Build Coastguard Worker Header->getFirstNonPHI());
659*9880d681SAndroid Build Coastguard Worker Value *IdxSub =
660*9880d681SAndroid Build Coastguard Worker B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
661*9880d681SAndroid Build Coastguard Worker NewIdx->getName() + ".nsub");
662*9880d681SAndroid Build Coastguard Worker Value *IdxCmp;
663*9880d681SAndroid Build Coastguard Worker if (LatchBR->getSuccessor(0) == Header)
664*9880d681SAndroid Build Coastguard Worker IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
665*9880d681SAndroid Build Coastguard Worker else
666*9880d681SAndroid Build Coastguard Worker IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
667*9880d681SAndroid Build Coastguard Worker NewIdx->addIncoming(TestVal, NewPreHeader);
668*9880d681SAndroid Build Coastguard Worker NewIdx->addIncoming(IdxSub, Latch);
669*9880d681SAndroid Build Coastguard Worker LatchBR->setCondition(IdxCmp);
670*9880d681SAndroid Build Coastguard Worker } else {
671*9880d681SAndroid Build Coastguard Worker // Connect the prolog code to the original loop and update the
672*9880d681SAndroid Build Coastguard Worker // PHI functions.
673*9880d681SAndroid Build Coastguard Worker ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
674*9880d681SAndroid Build Coastguard Worker VMap, DT, LI, PreserveLCSSA);
675*9880d681SAndroid Build Coastguard Worker }
676*9880d681SAndroid Build Coastguard Worker NumRuntimeUnrolled++;
677*9880d681SAndroid Build Coastguard Worker return true;
678*9880d681SAndroid Build Coastguard Worker }
679