1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <functional>
16 #include <map>
17 #include <memory>
18 #include <set>
19 #include <utility>
20 #include <vector>
21 
22 #include "source/opt/scalar_analysis.h"
23 
24 // Simplifies scalar analysis DAGs.
25 //
26 // 1. Given a node passed to SimplifyExpression we first simplify the graph by
27 // calling SimplifyPolynomial. This groups like nodes following basic arithmetic
28 // rules, so multiple adds of the same load instruction could be grouped into a
29 // single multiply of that instruction. SimplifyPolynomial will traverse the DAG
30 // and build up an accumulator buffer for each class of instruction it finds.
31 // For example take the loop:
32 // for (i=0, i<N; i++) { i+B+23+4+B+C; }
33 // In this example the expression "i+B+23+4+B+C" has four classes of
34 // instruction, induction variable i, the two value unknowns B and C, and the
35 // constants. The accumulator buffer is then used to rebuild the graph using
36 // the accumulation of each type. This example would then be folded into
37 // i+2*B+C+27.
38 //
39 // This new graph contains a single add node (or if only one type found then
40 // just that node) with each of the like terms (or multiplication node) as a
41 // child.
42 //
43 // 2. FoldRecurrentAddExpressions is then called on this new DAG. This will take
44 // RecurrentAddExpressions which are with respect to the same loop and fold them
45 // into a single new RecurrentAddExpression with respect to that same loop. An
46 // expression can have multiple RecurrentAddExpression's with respect to
47 // different loops in the case of nested loops. These expressions cannot be
48 // folded further. For example:
49 //
50 // for (i=0; i<N;i++) for(j=0,k=1; j<N;++j,++k)
51 //
52 // The 'j' and 'k' are RecurrentAddExpression with respect to the second loop
53 // and 'i' to the first. If 'j' and 'k' are used in an expression together then
54 // they will be folded into a new RecurrentAddExpression with respect to the
55 // second loop in that expression.
56 //
57 //
58 // 3. If the DAG now only contains a single RecurrentAddExpression we can now
59 // perform a final optimization SimplifyRecurrentAddExpression. This will
60 // transform the entire DAG into a RecurrentAddExpression. Additions to the
61 // RecurrentAddExpression are added to the offset field and multiplications to
62 // the coefficient.
63 //
64 
65 namespace spvtools {
66 namespace opt {
67 
68 // Implementation of the functions which are used to simplify the graph. Graphs
69 // of unknowns, multiplies, additions, and constants can be turned into a linear
70 // add node with each term as a child. For instance a large graph built from, X
71 // + X*2 + Y - Y*3 + 4 - 1, would become a single add expression with the
72 // children X*3, -Y*2, and the constant 3. Graphs containing a recurrent
73 // expression will be simplified to represent the entire graph around a single
74 // recurrent expression. So for an induction variable (i=0, i++) if you add 1 to
75 // i in an expression we can rewrite the graph of that expression to be a single
76 // recurrent expression of (i=1,i++).
77 class SENodeSimplifyImpl {
78  public:
SENodeSimplifyImpl(ScalarEvolutionAnalysis * analysis,SENode * node_to_simplify)79   SENodeSimplifyImpl(ScalarEvolutionAnalysis* analysis,
80                      SENode* node_to_simplify)
81       : analysis_(*analysis),
82         node_(node_to_simplify),
83         constant_accumulator_(0) {}
84 
85   // Return the result of the simplification.
86   SENode* Simplify();
87 
88  private:
89   // Recursively descend through the graph to build up the accumulator objects
90   // which are used to flatten the graph. |child| is the node currently being
91   // traversed and the |negation| flag is used to signify that this operation
92   // was preceded by a unary negative operation and as such the result should be
93   // negated.
94   void GatherAccumulatorsFromChildNodes(SENode* new_node, SENode* child,
95                                         bool negation);
96 
97   // Given a |multiply| node add to the accumulators for the term type within
98   // the |multiply| expression. Will return true if the accumulators could be
99   // calculated successfully. If the |multiply| is in any form other than
100   // unknown*constant then we return false. |negation| signifies that the
101   // operation was preceded by a unary negative.
102   bool AccumulatorsFromMultiply(SENode* multiply, bool negation);
103 
104   SERecurrentNode* UpdateCoefficient(SERecurrentNode* recurrent,
105                                      int64_t coefficient_update) const;
106 
107   // If the graph contains a recurrent expression, ie, an expression with the
108   // loop iterations as a term in the expression, then the whole expression
109   // can be rewritten to be a recurrent expression.
110   SENode* SimplifyRecurrentAddExpression(SERecurrentNode* node);
111 
112   // Simplify the whole graph by linking like terms together in a single flat
113   // add node. So X*2 + Y -Y + 3 +6 would become X*2 + 9. Where X and Y are a
114   // ValueUnknown node (i.e, a load) or a recurrent expression.
115   SENode* SimplifyPolynomial();
116 
117   // Each recurrent expression is an expression with respect to a specific loop.
118   // If we have two different recurrent terms with respect to the same loop in a
119   // single expression then we can fold those terms into a single new term.
120   // For instance:
121   //
122   // induction i = 0, i++
123   // temp = i*10
124   // array[i+temp]
125   //
126   // We can fold the i + temp into a single expression. Rec(0,1) + Rec(0,10) can
127   // become Rec(0,11).
128   SENode* FoldRecurrentAddExpressions(SENode*);
129 
130   // We can eliminate recurrent expressions which have a coefficient of zero by
131   // replacing them with their offset value. We are able to do this because a
132   // recurrent expression represents the equation coefficient*iterations +
133   // offset.
134   SENode* EliminateZeroCoefficientRecurrents(SENode* node);
135 
136   // A reference the analysis which requested the simplification.
137   ScalarEvolutionAnalysis& analysis_;
138 
139   // The node being simplified.
140   SENode* node_;
141 
142   // An accumulator of the net result of all the constant operations performed
143   // in a graph.
144   int64_t constant_accumulator_;
145 
146   // An accumulator for each of the non constant terms in the graph.
147   std::map<SENode*, int64_t> accumulators_;
148 };
149 
150 // From a |multiply| build up the accumulator objects.
AccumulatorsFromMultiply(SENode * multiply,bool negation)151 bool SENodeSimplifyImpl::AccumulatorsFromMultiply(SENode* multiply,
152                                                   bool negation) {
153   if (multiply->GetChildren().size() != 2 ||
154       multiply->GetType() != SENode::Multiply)
155     return false;
156 
157   SENode* operand_1 = multiply->GetChild(0);
158   SENode* operand_2 = multiply->GetChild(1);
159 
160   SENode* value_unknown = nullptr;
161   SENode* constant = nullptr;
162 
163   // Work out which operand is the unknown value.
164   if (operand_1->GetType() == SENode::ValueUnknown ||
165       operand_1->GetType() == SENode::RecurrentAddExpr)
166     value_unknown = operand_1;
167   else if (operand_2->GetType() == SENode::ValueUnknown ||
168            operand_2->GetType() == SENode::RecurrentAddExpr)
169     value_unknown = operand_2;
170 
171   // Work out which operand is the constant coefficient.
172   if (operand_1->GetType() == SENode::Constant)
173     constant = operand_1;
174   else if (operand_2->GetType() == SENode::Constant)
175     constant = operand_2;
176 
177   // If the expression is not a variable multiplied by a constant coefficient,
178   // exit out.
179   if (!(value_unknown && constant)) {
180     return false;
181   }
182 
183   int64_t sign = negation ? -1 : 1;
184 
185   auto iterator = accumulators_.find(value_unknown);
186   int64_t new_value = constant->AsSEConstantNode()->FoldToSingleValue() * sign;
187   // Add the result of the multiplication to the accumulators.
188   if (iterator != accumulators_.end()) {
189     (*iterator).second += new_value;
190   } else {
191     accumulators_.insert({value_unknown, new_value});
192   }
193 
194   return true;
195 }
196 
Simplify()197 SENode* SENodeSimplifyImpl::Simplify() {
198   // We only handle graphs with an addition, multiplication, or negation, at the
199   // root.
200   if (node_->GetType() != SENode::Add && node_->GetType() != SENode::Multiply &&
201       node_->GetType() != SENode::Negative)
202     return node_;
203 
204   SENode* simplified_polynomial = SimplifyPolynomial();
205 
206   SERecurrentNode* recurrent_expr = nullptr;
207   node_ = simplified_polynomial;
208 
209   // Fold recurrent expressions which are with respect to the same loop into a
210   // single recurrent expression.
211   simplified_polynomial = FoldRecurrentAddExpressions(simplified_polynomial);
212 
213   simplified_polynomial =
214       EliminateZeroCoefficientRecurrents(simplified_polynomial);
215 
216   // Traverse the immediate children of the new node to find the recurrent
217   // expression. If there is more than one there is nothing further we can do.
218   for (SENode* child : simplified_polynomial->GetChildren()) {
219     if (child->GetType() == SENode::RecurrentAddExpr) {
220       recurrent_expr = child->AsSERecurrentNode();
221     }
222   }
223 
224   // We need to count the number of unique recurrent expressions in the DAG to
225   // ensure there is only one.
226   for (auto child_iterator = simplified_polynomial->graph_begin();
227        child_iterator != simplified_polynomial->graph_end(); ++child_iterator) {
228     if (child_iterator->GetType() == SENode::RecurrentAddExpr &&
229         recurrent_expr != child_iterator->AsSERecurrentNode()) {
230       return simplified_polynomial;
231     }
232   }
233 
234   if (recurrent_expr) {
235     return SimplifyRecurrentAddExpression(recurrent_expr);
236   }
237 
238   return simplified_polynomial;
239 }
240 
241 // Traverse the graph to build up the accumulator objects.
GatherAccumulatorsFromChildNodes(SENode * new_node,SENode * child,bool negation)242 void SENodeSimplifyImpl::GatherAccumulatorsFromChildNodes(SENode* new_node,
243                                                           SENode* child,
244                                                           bool negation) {
245   int32_t sign = negation ? -1 : 1;
246 
247   if (child->GetType() == SENode::Constant) {
248     // Collect all the constants and add them together.
249     constant_accumulator_ +=
250         child->AsSEConstantNode()->FoldToSingleValue() * sign;
251 
252   } else if (child->GetType() == SENode::ValueUnknown ||
253              child->GetType() == SENode::RecurrentAddExpr) {
254     // To rebuild the graph of X+X+X*2 into 4*X we count the occurrences of X
255     // and create a new node of count*X after. X can either be a ValueUnknown or
256     // a RecurrentAddExpr. The count for each X is stored in the accumulators_
257     // map.
258 
259     auto iterator = accumulators_.find(child);
260     // If we've encountered this term before add to the accumulator for it.
261     if (iterator == accumulators_.end())
262       accumulators_.insert({child, sign});
263     else
264       iterator->second += sign;
265 
266   } else if (child->GetType() == SENode::Multiply) {
267     if (!AccumulatorsFromMultiply(child, negation)) {
268       new_node->AddChild(child);
269     }
270 
271   } else if (child->GetType() == SENode::Add) {
272     for (SENode* next_child : *child) {
273       GatherAccumulatorsFromChildNodes(new_node, next_child, negation);
274     }
275 
276   } else if (child->GetType() == SENode::Negative) {
277     SENode* negated_node = child->GetChild(0);
278     GatherAccumulatorsFromChildNodes(new_node, negated_node, !negation);
279   } else {
280     // If we can't work out how to fold the expression just add it back into
281     // the graph.
282     new_node->AddChild(child);
283   }
284 }
285 
UpdateCoefficient(SERecurrentNode * recurrent,int64_t coefficient_update) const286 SERecurrentNode* SENodeSimplifyImpl::UpdateCoefficient(
287     SERecurrentNode* recurrent, int64_t coefficient_update) const {
288   std::unique_ptr<SERecurrentNode> new_recurrent_node{new SERecurrentNode(
289       recurrent->GetParentAnalysis(), recurrent->GetLoop())};
290 
291   SENode* new_coefficient = analysis_.CreateMultiplyNode(
292       recurrent->GetCoefficient(),
293       analysis_.CreateConstant(coefficient_update));
294 
295   // See if the node can be simplified.
296   SENode* simplified = analysis_.SimplifyExpression(new_coefficient);
297   if (simplified->GetType() != SENode::CanNotCompute)
298     new_coefficient = simplified;
299 
300   if (coefficient_update < 0) {
301     new_recurrent_node->AddOffset(
302         analysis_.CreateNegation(recurrent->GetOffset()));
303   } else {
304     new_recurrent_node->AddOffset(recurrent->GetOffset());
305   }
306 
307   new_recurrent_node->AddCoefficient(new_coefficient);
308 
309   return analysis_.GetCachedOrAdd(std::move(new_recurrent_node))
310       ->AsSERecurrentNode();
311 }
312 
313 // Simplify all the terms in the polynomial function.
SimplifyPolynomial()314 SENode* SENodeSimplifyImpl::SimplifyPolynomial() {
315   std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
316 
317   // Traverse the graph and gather the accumulators from it.
318   GatherAccumulatorsFromChildNodes(new_add.get(), node_, false);
319 
320   // Fold all the constants into a single constant node.
321   if (constant_accumulator_ != 0) {
322     new_add->AddChild(analysis_.CreateConstant(constant_accumulator_));
323   }
324 
325   for (auto& pair : accumulators_) {
326     SENode* term = pair.first;
327     int64_t count = pair.second;
328 
329     // We can eliminate the term completely.
330     if (count == 0) continue;
331 
332     if (count == 1) {
333       new_add->AddChild(term);
334     } else if (count == -1 && term->GetType() != SENode::RecurrentAddExpr) {
335       // If the count is -1 we can just add a negative version of that node,
336       // unless it is a recurrent expression as we would rather the negative
337       // goes on the recurrent expressions children. This makes it easier to
338       // work with in other places.
339       new_add->AddChild(analysis_.CreateNegation(term));
340     } else {
341       // Output value unknown terms as count*term and output recurrent
342       // expression terms as rec(offset, coefficient + count) offset and
343       // coefficient are the same as in the original expression.
344       if (term->GetType() == SENode::ValueUnknown) {
345         SENode* count_as_constant = analysis_.CreateConstant(count);
346         new_add->AddChild(
347             analysis_.CreateMultiplyNode(count_as_constant, term));
348       } else {
349         assert(term->GetType() == SENode::RecurrentAddExpr &&
350                "We only handle value unknowns or recurrent expressions");
351 
352         // Create a new recurrent expression by adding the count to the
353         // coefficient of the old one.
354         new_add->AddChild(UpdateCoefficient(term->AsSERecurrentNode(), count));
355       }
356     }
357   }
358 
359   // If there is only one term in the addition left just return that term.
360   if (new_add->GetChildren().size() == 1) {
361     return new_add->GetChild(0);
362   }
363 
364   // If there are no terms left in the addition just return 0.
365   if (new_add->GetChildren().size() == 0) {
366     return analysis_.CreateConstant(0);
367   }
368 
369   return analysis_.GetCachedOrAdd(std::move(new_add));
370 }
371 
FoldRecurrentAddExpressions(SENode * root)372 SENode* SENodeSimplifyImpl::FoldRecurrentAddExpressions(SENode* root) {
373   std::unique_ptr<SEAddNode> new_node{new SEAddNode(&analysis_)};
374 
375   // A mapping of loops to the list of recurrent expressions which are with
376   // respect to those loops.
377   std::map<const Loop*, std::vector<std::pair<SERecurrentNode*, bool>>>
378       loops_to_recurrent{};
379 
380   bool has_multiple_same_loop_recurrent_terms = false;
381 
382   for (SENode* child : *root) {
383     bool negation = false;
384 
385     if (child->GetType() == SENode::Negative) {
386       child = child->GetChild(0);
387       negation = true;
388     }
389 
390     if (child->GetType() == SENode::RecurrentAddExpr) {
391       const Loop* loop = child->AsSERecurrentNode()->GetLoop();
392 
393       SERecurrentNode* rec = child->AsSERecurrentNode();
394       if (loops_to_recurrent.find(loop) == loops_to_recurrent.end()) {
395         loops_to_recurrent[loop] = {std::make_pair(rec, negation)};
396       } else {
397         loops_to_recurrent[loop].push_back(std::make_pair(rec, negation));
398         has_multiple_same_loop_recurrent_terms = true;
399       }
400     } else {
401       new_node->AddChild(child);
402     }
403   }
404 
405   if (!has_multiple_same_loop_recurrent_terms) return root;
406 
407   for (auto pair : loops_to_recurrent) {
408     std::vector<std::pair<SERecurrentNode*, bool>>& recurrent_expressions =
409         pair.second;
410     const Loop* loop = pair.first;
411 
412     std::unique_ptr<SENode> new_coefficient{new SEAddNode(&analysis_)};
413     std::unique_ptr<SENode> new_offset{new SEAddNode(&analysis_)};
414 
415     for (auto node_pair : recurrent_expressions) {
416       SERecurrentNode* node = node_pair.first;
417       bool negative = node_pair.second;
418 
419       if (!negative) {
420         new_coefficient->AddChild(node->GetCoefficient());
421         new_offset->AddChild(node->GetOffset());
422       } else {
423         new_coefficient->AddChild(
424             analysis_.CreateNegation(node->GetCoefficient()));
425         new_offset->AddChild(analysis_.CreateNegation(node->GetOffset()));
426       }
427     }
428 
429     std::unique_ptr<SERecurrentNode> new_recurrent{
430         new SERecurrentNode(&analysis_, loop)};
431 
432     SENode* new_coefficient_simplified =
433         analysis_.SimplifyExpression(new_coefficient.get());
434 
435     SENode* new_offset_simplified =
436         analysis_.SimplifyExpression(new_offset.get());
437 
438     if (new_coefficient_simplified->GetType() == SENode::Constant &&
439         new_coefficient_simplified->AsSEConstantNode()->FoldToSingleValue() ==
440             0) {
441       return new_offset_simplified;
442     }
443 
444     new_recurrent->AddCoefficient(new_coefficient_simplified);
445     new_recurrent->AddOffset(new_offset_simplified);
446 
447     new_node->AddChild(analysis_.GetCachedOrAdd(std::move(new_recurrent)));
448   }
449 
450   // If we only have one child in the add just return that.
451   if (new_node->GetChildren().size() == 1) {
452     return new_node->GetChild(0);
453   }
454 
455   return analysis_.GetCachedOrAdd(std::move(new_node));
456 }
457 
EliminateZeroCoefficientRecurrents(SENode * node)458 SENode* SENodeSimplifyImpl::EliminateZeroCoefficientRecurrents(SENode* node) {
459   if (node->GetType() != SENode::Add) return node;
460 
461   bool has_change = false;
462 
463   std::vector<SENode*> new_children{};
464   for (SENode* child : *node) {
465     if (child->GetType() == SENode::RecurrentAddExpr) {
466       SENode* coefficient = child->AsSERecurrentNode()->GetCoefficient();
467       // If coefficient is zero then we can eliminate the recurrent expression
468       // entirely and just return the offset as the recurrent expression is
469       // representing the equation coefficient*iterations + offset.
470       if (coefficient->GetType() == SENode::Constant &&
471           coefficient->AsSEConstantNode()->FoldToSingleValue() == 0) {
472         new_children.push_back(child->AsSERecurrentNode()->GetOffset());
473         has_change = true;
474       } else {
475         new_children.push_back(child);
476       }
477     } else {
478       new_children.push_back(child);
479     }
480   }
481 
482   if (!has_change) return node;
483 
484   std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
485 
486   for (SENode* child : new_children) {
487     new_add->AddChild(child);
488   }
489 
490   return analysis_.GetCachedOrAdd(std::move(new_add));
491 }
492 
SimplifyRecurrentAddExpression(SERecurrentNode * recurrent_expr)493 SENode* SENodeSimplifyImpl::SimplifyRecurrentAddExpression(
494     SERecurrentNode* recurrent_expr) {
495   const std::vector<SENode*>& children = node_->GetChildren();
496 
497   std::unique_ptr<SERecurrentNode> recurrent_node{new SERecurrentNode(
498       recurrent_expr->GetParentAnalysis(), recurrent_expr->GetLoop())};
499 
500   // Create and simplify the new offset node.
501   std::unique_ptr<SENode> new_offset{
502       new SEAddNode(recurrent_expr->GetParentAnalysis())};
503   new_offset->AddChild(recurrent_expr->GetOffset());
504 
505   for (SENode* child : children) {
506     if (child->GetType() != SENode::RecurrentAddExpr) {
507       new_offset->AddChild(child);
508     }
509   }
510 
511   // Simplify the new offset.
512   SENode* simplified_child = analysis_.SimplifyExpression(new_offset.get());
513 
514   // If the child can be simplified, add the simplified form otherwise, add it
515   // via the usual caching mechanism.
516   if (simplified_child->GetType() != SENode::CanNotCompute) {
517     recurrent_node->AddOffset(simplified_child);
518   } else {
519     recurrent_expr->AddOffset(analysis_.GetCachedOrAdd(std::move(new_offset)));
520   }
521 
522   recurrent_node->AddCoefficient(recurrent_expr->GetCoefficient());
523 
524   return analysis_.GetCachedOrAdd(std::move(recurrent_node));
525 }
526 
527 /*
528  * Scalar Analysis simplification public methods.
529  */
530 
SimplifyExpression(SENode * node)531 SENode* ScalarEvolutionAnalysis::SimplifyExpression(SENode* node) {
532   SENodeSimplifyImpl impl{this, node};
533 
534   return impl.Simplify();
535 }
536 
537 }  // namespace opt
538 }  // namespace spvtools
539