1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <memory>
17 #include <unordered_map>
18 #include <unordered_set>
19 #include <utility>
20 #include <vector>
21
22 #include "source/cfa.h"
23 #include "source/opt/cfg.h"
24 #include "source/opt/ir_builder.h"
25 #include "source/opt/ir_context.h"
26 #include "source/opt/loop_descriptor.h"
27 #include "source/opt/loop_utils.h"
28
29 namespace spvtools {
30 namespace opt {
31 namespace {
32 // Return true if |bb| is dominated by at least one block in |exits|
DominatesAnExit(BasicBlock * bb,const std::unordered_set<BasicBlock * > & exits,const DominatorTree & dom_tree)33 inline bool DominatesAnExit(BasicBlock* bb,
34 const std::unordered_set<BasicBlock*>& exits,
35 const DominatorTree& dom_tree) {
36 for (BasicBlock* e_bb : exits)
37 if (dom_tree.Dominates(bb, e_bb)) return true;
38 return false;
39 }
40
41 // Utility class to rewrite out-of-loop uses of an in-loop definition in terms
42 // of phi instructions to achieve a LCSSA form.
43 // For a given definition, the class user registers phi instructions using that
44 // definition in all loop exit blocks by which the definition escapes.
45 // Then, when rewriting a use of the definition, the rewriter walks the
46 // paths from the use the loop exits. At each step, it will insert a phi
47 // instruction to merge the incoming value according to exit blocks definition.
48 class LCSSARewriter {
49 public:
LCSSARewriter(IRContext * context,const DominatorTree & dom_tree,const std::unordered_set<BasicBlock * > & exit_bb,BasicBlock * merge_block)50 LCSSARewriter(IRContext* context, const DominatorTree& dom_tree,
51 const std::unordered_set<BasicBlock*>& exit_bb,
52 BasicBlock* merge_block)
53 : context_(context),
54 cfg_(context_->cfg()),
55 dom_tree_(dom_tree),
56 exit_bb_(exit_bb),
57 merge_block_id_(merge_block ? merge_block->id() : 0) {}
58
59 struct UseRewriter {
UseRewriterspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter60 explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn)
61 : base_(base), def_insn_(def_insn) {}
62 // Rewrites the use of |def_insn_| by the instruction |user| at the index
63 // |operand_index| in terms of phi instruction. This recursively builds new
64 // phi instructions from |user| to the loop exit blocks' phis. The use of
65 // |def_insn_| in |user| is replaced by the relevant phi instruction at the
66 // end of the operation.
67 // It is assumed that |user| does not dominates any of the loop exit basic
68 // block. This operation does not update the def/use manager, instead it
69 // records what needs to be updated. The actual update is performed by
70 // UpdateManagers.
RewriteUsespvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter71 void RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) {
72 assert(
73 (user->opcode() != spv::Op::OpPhi || bb != GetParent(user)) &&
74 "The root basic block must be the incoming edge if |user| is a phi "
75 "instruction");
76 assert((user->opcode() == spv::Op::OpPhi || bb == GetParent(user)) &&
77 "The root basic block must be the instruction parent if |user| is "
78 "not "
79 "phi instruction");
80
81 Instruction* new_def = GetOrBuildIncoming(bb->id());
82
83 user->SetOperand(operand_index, {new_def->result_id()});
84 rewritten_.insert(user);
85 }
86
87 // In-place update of some managers (avoid full invalidation).
UpdateManagersspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter88 inline void UpdateManagers() {
89 analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr();
90 // Register all new definitions.
91 for (Instruction* insn : rewritten_) {
92 def_use_mgr->AnalyzeInstDef(insn);
93 }
94 // Register all new uses.
95 for (Instruction* insn : rewritten_) {
96 def_use_mgr->AnalyzeInstUse(insn);
97 }
98 }
99
100 private:
101 // Return the basic block that |instr| belongs to.
GetParentspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter102 BasicBlock* GetParent(Instruction* instr) {
103 return base_->context_->get_instr_block(instr);
104 }
105
106 // Builds a phi instruction for the basic block |bb|. The function assumes
107 // that |defining_blocks| contains the list of basic block that define the
108 // usable value for each predecessor of |bb|.
CreatePhiInstructionspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter109 inline Instruction* CreatePhiInstruction(
110 BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) {
111 std::vector<uint32_t> incomings;
112 const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
113 assert(bb_preds.size() == defining_blocks.size());
114 for (size_t i = 0; i < bb_preds.size(); i++) {
115 incomings.push_back(
116 GetOrBuildIncoming(defining_blocks[i])->result_id());
117 incomings.push_back(bb_preds[i]);
118 }
119 InstructionBuilder builder(base_->context_, &*bb->begin(),
120 IRContext::kAnalysisInstrToBlockMapping);
121 Instruction* incoming_phi =
122 builder.AddPhi(def_insn_.type_id(), incomings);
123
124 rewritten_.insert(incoming_phi);
125 return incoming_phi;
126 }
127
128 // Builds a phi instruction for the basic block |bb|, all incoming values
129 // will be |value|.
CreatePhiInstructionspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter130 inline Instruction* CreatePhiInstruction(BasicBlock* bb,
131 const Instruction& value) {
132 std::vector<uint32_t> incomings;
133 const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
134 for (size_t i = 0; i < bb_preds.size(); i++) {
135 incomings.push_back(value.result_id());
136 incomings.push_back(bb_preds[i]);
137 }
138 InstructionBuilder builder(base_->context_, &*bb->begin(),
139 IRContext::kAnalysisInstrToBlockMapping);
140 Instruction* incoming_phi =
141 builder.AddPhi(def_insn_.type_id(), incomings);
142
143 rewritten_.insert(incoming_phi);
144 return incoming_phi;
145 }
146
147 // Return the new def to use for the basic block |bb_id|.
148 // If |bb_id| does not have a suitable def to use then we:
149 // - return the common def used by all predecessors;
150 // - if there is no common def, then we build a new phi instr at the
151 // beginning of |bb_id| and return this new instruction.
GetOrBuildIncomingspvtools::opt::__anon7842f7d10111::LCSSARewriter::UseRewriter152 Instruction* GetOrBuildIncoming(uint32_t bb_id) {
153 assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block");
154
155 Instruction*& incoming_phi = bb_to_phi_[bb_id];
156 if (incoming_phi) {
157 return incoming_phi;
158 }
159
160 BasicBlock* bb = &*base_->cfg_->block(bb_id);
161 // If this is an exit basic block, look if there already is an eligible
162 // phi instruction. An eligible phi has |def_insn_| as all incoming
163 // values.
164 if (base_->exit_bb_.count(bb)) {
165 // Look if there is an eligible phi in this block.
166 if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) {
167 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
168 if (phi->GetSingleWordInOperand(i) != def_insn_.result_id())
169 return true;
170 }
171 incoming_phi = phi;
172 rewritten_.insert(incoming_phi);
173 return false;
174 })) {
175 return incoming_phi;
176 }
177 incoming_phi = CreatePhiInstruction(bb, def_insn_);
178 return incoming_phi;
179 }
180
181 // Get the block that defines the value to use for each predecessor.
182 // If the vector has 1 value, then it means that this block does not need
183 // to build a phi instruction unless |bb_id| is the loop merge block.
184 const std::vector<uint32_t>& defining_blocks =
185 base_->GetDefiningBlocks(bb_id);
186
187 // Special case for structured loops: merge block might be different from
188 // the exit block set. To maintain structured properties it will ease
189 // transformations if the merge block also holds a phi instruction like
190 // the exit ones.
191 if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) {
192 if (defining_blocks.size() > 1) {
193 incoming_phi = CreatePhiInstruction(bb, defining_blocks);
194 } else {
195 assert(bb_id == base_->merge_block_id_);
196 incoming_phi =
197 CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0]));
198 }
199 } else {
200 incoming_phi = GetOrBuildIncoming(defining_blocks[0]);
201 }
202
203 return incoming_phi;
204 }
205
206 LCSSARewriter* base_;
207 const Instruction& def_insn_;
208 std::unordered_map<uint32_t, Instruction*> bb_to_phi_;
209 std::unordered_set<Instruction*> rewritten_;
210 };
211
212 private:
213 // Return the new def to use for the basic block |bb_id|.
214 // If |bb_id| does not have a suitable def to use then we:
215 // - return the common def used by all predecessors;
216 // - if there is no common def, then we build a new phi instr at the
217 // beginning of |bb_id| and return this new instruction.
GetDefiningBlocks(uint32_t bb_id)218 const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) {
219 assert(cfg_->block(bb_id) != nullptr && "Unknown basic block");
220 std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id];
221
222 if (defining_blocks.size()) return defining_blocks;
223
224 // Check if one of the loop exit basic block dominates |bb_id|.
225 for (const BasicBlock* e_bb : exit_bb_) {
226 if (dom_tree_.Dominates(e_bb->id(), bb_id)) {
227 defining_blocks.push_back(e_bb->id());
228 return defining_blocks;
229 }
230 }
231
232 // Process parents, they will returns their suitable blocks.
233 // If they are all the same, this means this basic block is dominated by a
234 // common block, so we won't need to build a phi instruction.
235 for (uint32_t pred_id : cfg_->preds(bb_id)) {
236 const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id);
237 if (pred_blocks.size() == 1)
238 defining_blocks.push_back(pred_blocks[0]);
239 else
240 defining_blocks.push_back(pred_id);
241 }
242 assert(defining_blocks.size());
243 if (std::all_of(defining_blocks.begin(), defining_blocks.end(),
244 [&defining_blocks](uint32_t id) {
245 return id == defining_blocks[0];
246 })) {
247 // No need for a phi.
248 defining_blocks.resize(1);
249 }
250
251 return defining_blocks;
252 }
253
254 IRContext* context_;
255 CFG* cfg_;
256 const DominatorTree& dom_tree_;
257 const std::unordered_set<BasicBlock*>& exit_bb_;
258 uint32_t merge_block_id_;
259 // This map represent the set of known paths. For each key, the vector
260 // represent the set of blocks holding the definition to be used to build the
261 // phi instruction.
262 // If the vector has 0 value, then the path is unknown yet, and must be built.
263 // If the vector has 1 value, then the value defined by that basic block
264 // should be used.
265 // If the vector has more than 1 value, then a phi node must be created, the
266 // basic block ordering is the same as the predecessor ordering.
267 std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_;
268 };
269
270 // Make the set |blocks| closed SSA. The set is closed SSA if all the uses
271 // outside the set are phi instructions in exiting basic block set (hold by
272 // |lcssa_rewriter|).
MakeSetClosedSSA(IRContext * context,Function * function,const std::unordered_set<uint32_t> & blocks,const std::unordered_set<BasicBlock * > & exit_bb,LCSSARewriter * lcssa_rewriter)273 inline void MakeSetClosedSSA(IRContext* context, Function* function,
274 const std::unordered_set<uint32_t>& blocks,
275 const std::unordered_set<BasicBlock*>& exit_bb,
276 LCSSARewriter* lcssa_rewriter) {
277 CFG& cfg = *context->cfg();
278 DominatorTree& dom_tree =
279 context->GetDominatorAnalysis(function)->GetDomTree();
280 analysis::DefUseManager* def_use_manager = context->get_def_use_mgr();
281
282 for (uint32_t bb_id : blocks) {
283 BasicBlock* bb = cfg.block(bb_id);
284 // If bb does not dominate an exit block, then it cannot have escaping defs.
285 if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue;
286 for (Instruction& inst : *bb) {
287 LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst);
288 def_use_manager->ForEachUse(
289 &inst, [&blocks, &rewriter, &exit_bb, context](
290 Instruction* use, uint32_t operand_index) {
291 BasicBlock* use_parent = context->get_instr_block(use);
292 assert(use_parent);
293 if (blocks.count(use_parent->id())) return;
294
295 if (use->opcode() == spv::Op::OpPhi) {
296 // If the use is a Phi instruction and the incoming block is
297 // coming from the loop, then that's consistent with LCSSA form.
298 if (exit_bb.count(use_parent)) {
299 return;
300 } else {
301 // That's not an exit block, but the user is a phi instruction.
302 // Consider the incoming branch only.
303 use_parent = context->get_instr_block(
304 use->GetSingleWordOperand(operand_index + 1));
305 }
306 }
307 // Rewrite the use. Note that this call does not invalidate the
308 // def/use manager. So this operation is safe.
309 rewriter.RewriteUse(use_parent, use, operand_index);
310 });
311 rewriter.UpdateManagers();
312 }
313 }
314 }
315
316 } // namespace
317
CreateLoopDedicatedExits()318 void LoopUtils::CreateLoopDedicatedExits() {
319 Function* function = loop_->GetHeaderBlock()->GetParent();
320 LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function);
321 CFG& cfg = *context_->cfg();
322 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
323
324 const IRContext::Analysis PreservedAnalyses =
325 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
326
327 // Gathers the set of basic block that are not in this loop and have at least
328 // one predecessor in the loop and one not in the loop.
329 std::unordered_set<uint32_t> exit_bb_set;
330 loop_->GetExitBlocks(&exit_bb_set);
331
332 std::unordered_set<BasicBlock*> new_loop_exits;
333 bool made_change = false;
334 // For each block, we create a new one that gathers all branches from
335 // the loop and fall into the block.
336 for (uint32_t non_dedicate_id : exit_bb_set) {
337 BasicBlock* non_dedicate = cfg.block(non_dedicate_id);
338 const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id);
339 // Ignore the block if all the predecessors are in the loop.
340 if (std::all_of(bb_pred.begin(), bb_pred.end(),
341 [this](uint32_t id) { return loop_->IsInsideLoop(id); })) {
342 new_loop_exits.insert(non_dedicate);
343 continue;
344 }
345
346 made_change = true;
347 Function::iterator insert_pt = function->begin();
348 for (; insert_pt != function->end() && &*insert_pt != non_dedicate;
349 ++insert_pt) {
350 }
351 assert(insert_pt != function->end() && "Basic Block not found");
352
353 // Create the dedicate exit basic block.
354 // TODO(1841): Handle id overflow.
355 BasicBlock& exit = *insert_pt.InsertBefore(std::unique_ptr<BasicBlock>(
356 new BasicBlock(std::unique_ptr<Instruction>(new Instruction(
357 context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})))));
358 exit.SetParent(function);
359
360 // Redirect in loop predecessors to |exit| block.
361 for (uint32_t exit_pred_id : bb_pred) {
362 if (loop_->IsInsideLoop(exit_pred_id)) {
363 BasicBlock* pred_block = cfg.block(exit_pred_id);
364 pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) {
365 if (*id == non_dedicate->id()) *id = exit.id();
366 });
367 // Update the CFG.
368 // |non_dedicate|'s predecessor list will be updated at the end of the
369 // loop.
370 cfg.RegisterBlock(pred_block);
371 }
372 }
373
374 // Register the label to the def/use manager, requires for the phi patching.
375 def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst());
376 context_->set_instr_block(exit.GetLabelInst(), &exit);
377
378 InstructionBuilder builder(context_, &exit, PreservedAnalyses);
379 // Now jump from our dedicate basic block to the old exit.
380 // We also reset the insert point so all instructions are inserted before
381 // the branch.
382 builder.SetInsertPoint(builder.AddBranch(non_dedicate->id()));
383 non_dedicate->ForEachPhiInst(
384 [&builder, &exit, def_use_mgr, this](Instruction* phi) {
385 // New phi operands for this instruction.
386 std::vector<uint32_t> new_phi_op;
387 // Phi operands for the dedicated exit block.
388 std::vector<uint32_t> exit_phi_op;
389 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
390 uint32_t def_id = phi->GetSingleWordInOperand(i);
391 uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1);
392 if (loop_->IsInsideLoop(incoming_id)) {
393 exit_phi_op.push_back(def_id);
394 exit_phi_op.push_back(incoming_id);
395 } else {
396 new_phi_op.push_back(def_id);
397 new_phi_op.push_back(incoming_id);
398 }
399 }
400
401 // Build the new phi instruction dedicated exit block.
402 Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op);
403 // Build the new incoming branch.
404 new_phi_op.push_back(exit_phi->result_id());
405 new_phi_op.push_back(exit.id());
406 // Rewrite operands.
407 uint32_t idx = 0;
408 for (; idx < new_phi_op.size(); idx++)
409 phi->SetInOperand(idx, {new_phi_op[idx]});
410 // Remove extra operands, from last to first (more efficient).
411 for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--)
412 phi->RemoveInOperand(j);
413 // Update the def/use manager for this |phi|.
414 def_use_mgr->AnalyzeInstUse(phi);
415 });
416 // Update the CFG.
417 cfg.RegisterBlock(&exit);
418 cfg.RemoveNonExistingEdges(non_dedicate->id());
419 new_loop_exits.insert(&exit);
420 // If non_dedicate is in a loop, add the new dedicated exit in that loop.
421 if (Loop* parent_loop = loop_desc[non_dedicate])
422 parent_loop->AddBasicBlock(&exit);
423 }
424
425 if (new_loop_exits.size() == 1) {
426 loop_->SetMergeBlock(*new_loop_exits.begin());
427 }
428
429 if (made_change) {
430 context_->InvalidateAnalysesExceptFor(
431 PreservedAnalyses | IRContext::kAnalysisCFG |
432 IRContext::Analysis::kAnalysisLoopAnalysis);
433 }
434 }
435
MakeLoopClosedSSA()436 void LoopUtils::MakeLoopClosedSSA() {
437 CreateLoopDedicatedExits();
438
439 Function* function = loop_->GetHeaderBlock()->GetParent();
440 CFG& cfg = *context_->cfg();
441 DominatorTree& dom_tree =
442 context_->GetDominatorAnalysis(function)->GetDomTree();
443
444 std::unordered_set<BasicBlock*> exit_bb;
445 {
446 std::unordered_set<uint32_t> exit_bb_id;
447 loop_->GetExitBlocks(&exit_bb_id);
448 for (uint32_t bb_id : exit_bb_id) {
449 exit_bb.insert(cfg.block(bb_id));
450 }
451 }
452
453 LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb,
454 loop_->GetMergeBlock());
455 MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb,
456 &lcssa_rewriter);
457
458 // Make sure all defs post-dominated by the merge block have their last use no
459 // further than the merge block.
460 if (loop_->GetMergeBlock()) {
461 std::unordered_set<uint32_t> merging_bb_id;
462 loop_->GetMergingBlocks(&merging_bb_id);
463 merging_bb_id.erase(loop_->GetMergeBlock()->id());
464 // Reset the exit set, now only the merge block is the exit.
465 exit_bb.clear();
466 exit_bb.insert(loop_->GetMergeBlock());
467 // LCSSARewriter is reusable here only because it forces the creation of a
468 // phi instruction in the merge block.
469 MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb,
470 &lcssa_rewriter);
471 }
472
473 context_->InvalidateAnalysesExceptFor(
474 IRContext::Analysis::kAnalysisCFG |
475 IRContext::Analysis::kAnalysisDominatorAnalysis |
476 IRContext::Analysis::kAnalysisLoopAnalysis);
477 }
478
CloneLoop(LoopCloningResult * cloning_result) const479 Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const {
480 // Compute the structured order of the loop basic blocks and store it in the
481 // vector ordered_loop_blocks.
482 std::vector<BasicBlock*> ordered_loop_blocks;
483 loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
484
485 // Clone the loop.
486 return CloneLoop(cloning_result, ordered_loop_blocks);
487 }
488
CloneAndAttachLoopToHeader(LoopCloningResult * cloning_result)489 Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) {
490 // Clone the loop.
491 Loop* new_loop = CloneLoop(cloning_result);
492
493 // Create a new exit block/label for the new loop.
494 // TODO(1841): Handle id overflow.
495 std::unique_ptr<Instruction> new_label{new Instruction(
496 context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
497 std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
498 new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent());
499
500 // Create an unconditional branch to the header block.
501 InstructionBuilder builder{context_, new_exit_bb.get()};
502 builder.AddBranch(loop_->GetHeaderBlock()->id());
503
504 // Save the ids of the new and old merge block.
505 const uint32_t old_merge_block = loop_->GetMergeBlock()->id();
506 const uint32_t new_merge_block = new_exit_bb->id();
507
508 // Replace the uses of the old merge block in the new loop with the new merge
509 // block.
510 for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) {
511 for (Instruction& inst : *basic_block) {
512 // For each operand in each instruction check if it is using the old merge
513 // block and change it to be the new merge block.
514 auto replace_merge_use = [old_merge_block,
515 new_merge_block](uint32_t* id) {
516 if (*id == old_merge_block) *id = new_merge_block;
517 };
518 inst.ForEachInOperand(replace_merge_use);
519 }
520 }
521
522 const uint32_t old_header = loop_->GetHeaderBlock()->id();
523 const uint32_t new_header = new_loop->GetHeaderBlock()->id();
524 analysis::DefUseManager* def_use = context_->get_def_use_mgr();
525
526 def_use->ForEachUse(old_header,
527 [new_header, this](Instruction* inst, uint32_t operand) {
528 if (!this->loop_->IsInsideLoop(inst))
529 inst->SetOperand(operand, {new_header});
530 });
531
532 // TODO(1841): Handle failure to create pre-header.
533 def_use->ForEachUse(
534 loop_->GetOrCreatePreHeaderBlock()->id(),
535 [new_merge_block, this](Instruction* inst, uint32_t operand) {
536 if (this->loop_->IsInsideLoop(inst))
537 inst->SetOperand(operand, {new_merge_block});
538
539 });
540 new_loop->SetMergeBlock(new_exit_bb.get());
541
542 new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock());
543
544 // Add the new block into the cloned instructions.
545 cloning_result->cloned_bb_.push_back(std::move(new_exit_bb));
546
547 return new_loop;
548 }
549
CloneLoop(LoopCloningResult * cloning_result,const std::vector<BasicBlock * > & ordered_loop_blocks) const550 Loop* LoopUtils::CloneLoop(
551 LoopCloningResult* cloning_result,
552 const std::vector<BasicBlock*>& ordered_loop_blocks) const {
553 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
554
555 std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_);
556
557 CFG& cfg = *context_->cfg();
558
559 // Clone and place blocks in a SPIR-V compliant order (dominators first).
560 for (BasicBlock* old_bb : ordered_loop_blocks) {
561 // For each basic block in the loop, we clone it and register the mapping
562 // between old and new ids.
563 BasicBlock* new_bb = old_bb->Clone(context_);
564 new_bb->SetParent(&function_);
565 // TODO(1841): Handle id overflow.
566 new_bb->GetLabelInst()->SetResultId(context_->TakeNextId());
567 def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst());
568 context_->set_instr_block(new_bb->GetLabelInst(), new_bb);
569 cloning_result->cloned_bb_.emplace_back(new_bb);
570
571 cloning_result->old_to_new_bb_[old_bb->id()] = new_bb;
572 cloning_result->new_to_old_bb_[new_bb->id()] = old_bb;
573 cloning_result->value_map_[old_bb->id()] = new_bb->id();
574
575 if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb);
576
577 for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin();
578 new_inst != new_bb->end(); ++new_inst, ++old_inst) {
579 cloning_result->ptr_map_[&*new_inst] = &*old_inst;
580 if (new_inst->HasResultId()) {
581 // TODO(1841): Handle id overflow.
582 new_inst->SetResultId(context_->TakeNextId());
583 cloning_result->value_map_[old_inst->result_id()] =
584 new_inst->result_id();
585
586 // Only look at the defs for now, uses are not updated yet.
587 def_use_mgr->AnalyzeInstDef(&*new_inst);
588 }
589 }
590 }
591
592 // All instructions (including all labels) have been cloned,
593 // remap instruction operands id with the new ones.
594 for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) {
595 BasicBlock* bb = bb_ref.get();
596
597 for (Instruction& insn : *bb) {
598 insn.ForEachInId([cloning_result](uint32_t* old_id) {
599 // If the operand is defined in the loop, remap the id.
600 auto id_it = cloning_result->value_map_.find(*old_id);
601 if (id_it != cloning_result->value_map_.end()) {
602 *old_id = id_it->second;
603 }
604 });
605 // Only look at what the instruction uses. All defs are register, so all
606 // should be fine now.
607 def_use_mgr->AnalyzeInstUse(&insn);
608 context_->set_instr_block(&insn, bb);
609 }
610 cfg.RegisterBlock(bb);
611 }
612
613 PopulateLoopNest(new_loop.get(), *cloning_result);
614
615 return new_loop.release();
616 }
617
PopulateLoopNest(Loop * new_loop,const LoopCloningResult & cloning_result) const618 void LoopUtils::PopulateLoopNest(
619 Loop* new_loop, const LoopCloningResult& cloning_result) const {
620 std::unordered_map<Loop*, Loop*> loop_mapping;
621 loop_mapping[loop_] = new_loop;
622
623 if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop);
624 PopulateLoopDesc(new_loop, loop_, cloning_result);
625
626 for (Loop& sub_loop :
627 make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) {
628 Loop* cloned = new Loop(context_);
629 if (Loop* parent = loop_mapping[sub_loop.GetParent()])
630 parent->AddNestedLoop(cloned);
631 loop_mapping[&sub_loop] = cloned;
632 PopulateLoopDesc(cloned, &sub_loop, cloning_result);
633 }
634
635 loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop));
636 }
637
638 // Populates |new_loop| descriptor according to |old_loop|'s one.
PopulateLoopDesc(Loop * new_loop,Loop * old_loop,const LoopCloningResult & cloning_result) const639 void LoopUtils::PopulateLoopDesc(
640 Loop* new_loop, Loop* old_loop,
641 const LoopCloningResult& cloning_result) const {
642 for (uint32_t bb_id : old_loop->GetBlocks()) {
643 BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id);
644 new_loop->AddBasicBlock(bb);
645 }
646 new_loop->SetHeaderBlock(
647 cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id()));
648 if (old_loop->GetLatchBlock())
649 new_loop->SetLatchBlock(
650 cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id()));
651 if (old_loop->GetContinueBlock())
652 new_loop->SetContinueBlock(
653 cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id()));
654 if (old_loop->GetMergeBlock()) {
655 auto it =
656 cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id());
657 BasicBlock* bb = it != cloning_result.old_to_new_bb_.end()
658 ? it->second
659 : old_loop->GetMergeBlock();
660 new_loop->SetMergeBlock(bb);
661 }
662 if (old_loop->GetPreHeaderBlock()) {
663 auto it =
664 cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id());
665 if (it != cloning_result.old_to_new_bb_.end()) {
666 new_loop->SetPreHeaderBlock(it->second);
667 }
668 }
669 }
670
671 // Class to gather some metrics about a region of interest.
Analyze(const Loop & loop)672 void CodeMetrics::Analyze(const Loop& loop) {
673 CFG& cfg = *loop.GetContext()->cfg();
674
675 roi_size_ = 0;
676 block_sizes_.clear();
677
678 for (uint32_t id : loop.GetBlocks()) {
679 const BasicBlock* bb = cfg.block(id);
680 size_t bb_size = 0;
681 bb->ForEachInst([&bb_size](const Instruction* insn) {
682 if (insn->opcode() == spv::Op::OpLabel) return;
683 if (insn->IsNop()) return;
684 if (insn->opcode() == spv::Op::OpPhi) return;
685 bb_size++;
686 });
687 block_sizes_[bb->id()] = bb_size;
688 roi_size_ += bb_size;
689 }
690 }
691
692 } // namespace opt
693 } // namespace spvtools
694