1 //===--- LoopConvertUtils.h - clang-tidy ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
11 
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Basic/SourceLocation.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <algorithm>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 
25 namespace clang::tidy::modernize {
26 
27 enum LoopFixerKind {
28   LFK_Array,
29   LFK_Iterator,
30   LFK_ReverseIterator,
31   LFK_PseudoArray
32 };
33 
34 /// A map used to walk the AST in reverse: maps child Stmt to parent Stmt.
35 using StmtParentMap = llvm::DenseMap<const clang::Stmt *, const clang::Stmt *>;
36 
37 /// A map used to walk the AST in reverse:
38 ///  maps VarDecl to the to parent DeclStmt.
39 using DeclParentMap =
40     llvm::DenseMap<const clang::VarDecl *, const clang::DeclStmt *>;
41 
42 /// A map used to track which variables have been removed by a refactoring pass.
43 /// It maps the parent ForStmt to the removed index variable's VarDecl.
44 using ReplacedVarsMap =
45     llvm::DenseMap<const clang::ForStmt *, const clang::VarDecl *>;
46 
47 /// A map used to remember the variable names generated in a Stmt
48 using StmtGeneratedVarNameMap =
49     llvm::DenseMap<const clang::Stmt *, std::string>;
50 
51 /// A vector used to store the AST subtrees of an Expr.
52 using ComponentVector = llvm::SmallVector<const clang::Expr *, 16>;
53 
54 /// Class used build the reverse AST properties needed to detect
55 /// name conflicts and free variables.
56 class StmtAncestorASTVisitor
57     : public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> {
58 public:
StmtAncestorASTVisitor()59   StmtAncestorASTVisitor() { StmtStack.push_back(nullptr); }
60 
61   /// Run the analysis on the AST.
62   ///
63   /// In case we're running this analysis multiple times, don't repeat the work.
gatherAncestors(ASTContext & Ctx)64   void gatherAncestors(ASTContext &Ctx) {
65     if (StmtAncestors.empty())
66       TraverseAST(Ctx);
67   }
68 
69   /// Accessor for StmtAncestors.
getStmtToParentStmtMap()70   const StmtParentMap &getStmtToParentStmtMap() { return StmtAncestors; }
71 
72   /// Accessor for DeclParents.
getDeclToParentStmtMap()73   const DeclParentMap &getDeclToParentStmtMap() { return DeclParents; }
74 
75   friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>;
76 
77 private:
78   StmtParentMap StmtAncestors;
79   DeclParentMap DeclParents;
80   llvm::SmallVector<const clang::Stmt *, 16> StmtStack;
81 
82   bool TraverseStmt(clang::Stmt *Statement);
83   bool VisitDeclStmt(clang::DeclStmt *Statement);
84 };
85 
86 /// Class used to find the variables and member expressions on which an
87 /// arbitrary expression depends.
88 class ComponentFinderASTVisitor
89     : public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> {
90 public:
91   ComponentFinderASTVisitor() = default;
92 
93   /// Find the components of an expression and place them in a ComponentVector.
findExprComponents(const clang::Expr * SourceExpr)94   void findExprComponents(const clang::Expr *SourceExpr) {
95     TraverseStmt(const_cast<clang::Expr *>(SourceExpr));
96   }
97 
98   /// Accessor for Components.
getComponents()99   const ComponentVector &getComponents() { return Components; }
100 
101   friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>;
102 
103 private:
104   ComponentVector Components;
105 
106   bool VisitDeclRefExpr(clang::DeclRefExpr *E);
107   bool VisitMemberExpr(clang::MemberExpr *Member);
108 };
109 
110 /// Class used to determine if an expression is dependent on a variable declared
111 /// inside of the loop where it would be used.
112 class DependencyFinderASTVisitor
113     : public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> {
114 public:
DependencyFinderASTVisitor(const StmtParentMap * StmtParents,const DeclParentMap * DeclParents,const ReplacedVarsMap * ReplacedVars,const clang::Stmt * ContainingStmt)115   DependencyFinderASTVisitor(const StmtParentMap *StmtParents,
116                              const DeclParentMap *DeclParents,
117                              const ReplacedVarsMap *ReplacedVars,
118                              const clang::Stmt *ContainingStmt)
119       : StmtParents(StmtParents), DeclParents(DeclParents),
120         ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) {}
121 
122   /// Run the analysis on Body, and return true iff the expression
123   /// depends on some variable declared within ContainingStmt.
124   ///
125   /// This is intended to protect against hoisting the container expression
126   /// outside of an inner context if part of that expression is declared in that
127   /// inner context.
128   ///
129   /// For example,
130   /// \code
131   ///   const int N = 10, M = 20;
132   ///   int arr[N][M];
133   ///   int getRow();
134   ///
135   ///   for (int i = 0; i < M; ++i) {
136   ///     int k = getRow();
137   ///     printf("%d:", arr[k][i]);
138   ///   }
139   /// \endcode
140   /// At first glance, this loop looks like it could be changed to
141   /// \code
142   ///   for (int elem : arr[k]) {
143   ///     int k = getIndex();
144   ///     printf("%d:", elem);
145   ///   }
146   /// \endcode
147   /// But this is malformed, since `k` is used before it is defined!
148   ///
149   /// In order to avoid this, this class looks at the container expression
150   /// `arr[k]` and decides whether or not it contains a sub-expression declared
151   /// within the loop body.
dependsOnInsideVariable(const clang::Stmt * Body)152   bool dependsOnInsideVariable(const clang::Stmt *Body) {
153     DependsOnInsideVariable = false;
154     TraverseStmt(const_cast<clang::Stmt *>(Body));
155     return DependsOnInsideVariable;
156   }
157 
158   friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>;
159 
160 private:
161   const StmtParentMap *StmtParents;
162   const DeclParentMap *DeclParents;
163   const clang::Stmt *ContainingStmt;
164   const ReplacedVarsMap *ReplacedVars;
165   bool DependsOnInsideVariable;
166 
167   bool VisitVarDecl(clang::VarDecl *V);
168   bool VisitDeclRefExpr(clang::DeclRefExpr *D);
169 };
170 
171 /// Class used to determine if any declarations used in a Stmt would conflict
172 /// with a particular identifier. This search includes the names that don't
173 /// actually appear in the AST (i.e. created by a refactoring tool) by including
174 /// a map from Stmts to generated names associated with those stmts.
175 class DeclFinderASTVisitor
176     : public clang::RecursiveASTVisitor<DeclFinderASTVisitor> {
177 public:
DeclFinderASTVisitor(const StringRef & Name,const StmtGeneratedVarNameMap * GeneratedDecls)178   DeclFinderASTVisitor(const StringRef &Name,
179                        const StmtGeneratedVarNameMap *GeneratedDecls)
180       : Name(Name), GeneratedDecls(GeneratedDecls) {}
181 
182   /// Attempts to find any usages of variables name Name in Body, returning
183   /// true when it is used in Body. This includes the generated loop variables
184   /// of ForStmts which have already been transformed.
findUsages(const clang::Stmt * Body)185   bool findUsages(const clang::Stmt *Body) {
186     Found = false;
187     TraverseStmt(const_cast<clang::Stmt *>(Body));
188     return Found;
189   }
190 
191   friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>;
192 
193 private:
194   std::string Name;
195   /// GeneratedDecls keeps track of ForStmts which have been transformed,
196   /// mapping each modified ForStmt to the variable generated in the loop.
197   const StmtGeneratedVarNameMap *GeneratedDecls;
198   bool Found = false;
199 
200   bool VisitForStmt(clang::ForStmt *);
201   bool VisitNamedDecl(clang::NamedDecl *);
202   bool VisitDeclRefExpr(clang::DeclRefExpr *);
203   bool VisitTypeLoc(clang::TypeLoc);
204 };
205 
206 /// The information needed to describe a valid convertible usage
207 /// of an array index or iterator.
208 struct Usage {
209   enum UsageKind {
210     // Regular usages of the loop index (the ones not specified below). Some
211     // examples:
212     // \code
213     //   int X = 8 * Arr[i];
214     //               ^~~~~~
215     //   f(param1, param2, *It);
216     //                     ^~~
217     //   if (Vec[i].SomeBool) {}
218     //       ^~~~~~
219     // \endcode
220     UK_Default,
221     // Indicates whether this is an access to a member through the arrow
222     // operator on pointers or iterators.
223     UK_MemberThroughArrow,
224     // If the variable is being captured by a lambda, indicates whether the
225     // capture was done by value or by reference.
226     UK_CaptureByCopy,
227     UK_CaptureByRef
228   };
229   // The expression that is going to be converted. Null in case of lambda
230   // captures.
231   const Expr *Expression;
232 
233   UsageKind Kind;
234 
235   // Range that covers this usage.
236   SourceRange Range;
237 
UsageUsage238   explicit Usage(const Expr *E)
239       : Expression(E), Kind(UK_Default), Range(Expression->getSourceRange()) {}
UsageUsage240   Usage(const Expr *E, UsageKind Kind, SourceRange Range)
241       : Expression(E), Kind(Kind), Range(std::move(Range)) {}
242 };
243 
244 /// A class to encapsulate lowering of the tool's confidence level.
245 class Confidence {
246 public:
247   enum Level {
248     // Transformations that are likely to change semantics.
249     CL_Risky,
250 
251     // Transformations that might change semantics.
252     CL_Reasonable,
253 
254     // Transformations that will not change semantics.
255     CL_Safe
256   };
257   /// Initialize confidence level.
Confidence(Confidence::Level Level)258   explicit Confidence(Confidence::Level Level) : CurrentLevel(Level) {}
259 
260   /// Lower the internal confidence level to Level, but do not raise it.
lowerTo(Confidence::Level Level)261   void lowerTo(Confidence::Level Level) {
262     CurrentLevel = std::min(Level, CurrentLevel);
263   }
264 
265   /// Return the internal confidence level.
getLevel()266   Level getLevel() const { return CurrentLevel; }
267 
268 private:
269   Level CurrentLevel;
270 };
271 
272 // The main computational result of ForLoopIndexVisitor.
273 using UsageResult = llvm::SmallVector<Usage, 8>;
274 
275 // General functions used by ForLoopIndexUseVisitor and LoopConvertCheck.
276 const Expr *digThroughConstructorsConversions(const Expr *E);
277 bool areSameExpr(ASTContext *Context, const Expr *First, const Expr *Second);
278 const DeclRefExpr *getDeclRef(const Expr *E);
279 bool areSameVariable(const ValueDecl *First, const ValueDecl *Second);
280 
281 /// Discover usages of expressions consisting of index or iterator
282 /// access.
283 ///
284 /// Given an index variable, recursively crawls a for loop to discover if the
285 /// index variable is used in a way consistent with range-based for loop access.
286 class ForLoopIndexUseVisitor
287     : public RecursiveASTVisitor<ForLoopIndexUseVisitor> {
288 public:
289   ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar,
290                          const VarDecl *EndVar, const Expr *ContainerExpr,
291                          const Expr *ArrayBoundExpr,
292                          bool ContainerNeedsDereference);
293 
294   /// Finds all uses of IndexVar in Body, placing all usages in Usages,
295   /// and returns true if IndexVar was only used in a way consistent with a
296   /// range-based for loop.
297   ///
298   /// The general strategy is to reject any DeclRefExprs referencing IndexVar,
299   /// with the exception of certain acceptable patterns.
300   /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an
301   /// ArraySubscriptExpression. Iterator-based loops may dereference
302   /// IndexVar or call methods through operator-> (builtin or overloaded).
303   /// Array-like containers may use IndexVar as a parameter to the at() member
304   /// function and in overloaded operator[].
305   bool findAndVerifyUsages(const Stmt *Body);
306 
307   /// Add a set of components that we should consider relevant to the
308   /// container.
309   void addComponents(const ComponentVector &Components);
310 
311   /// Accessor for Usages.
getUsages()312   const UsageResult &getUsages() const { return Usages; }
313 
314   /// Adds the Usage if it was not added before.
315   void addUsage(const Usage &U);
316 
317   /// Get the container indexed by IndexVar, if any.
getContainerIndexed()318   const Expr *getContainerIndexed() const { return ContainerExpr; }
319 
320   /// Returns the statement declaring the variable created as an alias
321   /// for the loop element, if any.
getAliasDecl()322   const DeclStmt *getAliasDecl() const { return AliasDecl; }
323 
324   /// Accessor for ConfidenceLevel.
getConfidenceLevel()325   Confidence::Level getConfidenceLevel() const {
326     return ConfidenceLevel.getLevel();
327   }
328 
329   /// Indicates if the alias declaration was in a place where it cannot
330   /// simply be removed but rather replaced with a use of the alias variable.
331   /// For example, variables declared in the condition of an if, switch, or for
332   /// stmt.
aliasUseRequired()333   bool aliasUseRequired() const { return ReplaceWithAliasUse; }
334 
335   /// Indicates if the alias declaration came from the init clause of a
336   /// nested for loop. SourceRanges provided by Clang for DeclStmts in this
337   /// case need to be adjusted.
aliasFromForInit()338   bool aliasFromForInit() const { return AliasFromForInit; }
339 
340 private:
341   /// Typedef used in CRTP functions.
342   using VisitorBase = RecursiveASTVisitor<ForLoopIndexUseVisitor>;
343   friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>;
344 
345   /// Overriden methods for RecursiveASTVisitor's traversal.
346   bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E);
347   bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall);
348   bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall);
349   bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C,
350                              Expr *Init);
351   bool TraverseMemberExpr(MemberExpr *Member);
352   bool TraverseUnaryOperator(UnaryOperator *Uop);
353   bool VisitDeclRefExpr(DeclRefExpr *E);
354   bool VisitDeclStmt(DeclStmt *S);
355   bool TraverseStmt(Stmt *S);
356 
357   /// Add an expression to the list of expressions on which the container
358   /// expression depends.
359   void addComponent(const Expr *E);
360 
361   // Input member variables:
362   ASTContext *Context;
363   /// The index variable's VarDecl.
364   const VarDecl *IndexVar;
365   /// The loop's 'end' variable, which cannot be mentioned at all.
366   const VarDecl *EndVar;
367   /// The Expr which refers to the container.
368   const Expr *ContainerExpr;
369   /// The Expr which refers to the terminating condition for array-based loops.
370   const Expr *ArrayBoundExpr;
371   bool ContainerNeedsDereference;
372 
373   // Output member variables:
374   /// A container which holds all usages of IndexVar as the index of
375   /// ArraySubscriptExpressions.
376   UsageResult Usages;
377   llvm::SmallSet<SourceLocation, 8> UsageLocations;
378   bool OnlyUsedAsIndex = true;
379   /// The DeclStmt for an alias to the container element.
380   const DeclStmt *AliasDecl = nullptr;
381   Confidence ConfidenceLevel;
382   /// A list of expressions on which ContainerExpr depends.
383   ///
384   /// If any of these expressions are encountered outside of an acceptable usage
385   /// of the loop element, lower our confidence level.
386   llvm::SmallVector<std::pair<const Expr *, llvm::FoldingSetNodeID>, 16>
387       DependentExprs;
388 
389   /// The parent-in-waiting. Will become the real parent once we traverse down
390   /// one level in the AST.
391   const Stmt *NextStmtParent = nullptr;
392   /// The actual parent of a node when Visit*() calls are made. Only the
393   /// parentage of DeclStmt's to possible iteration/selection statements is of
394   /// importance.
395   const Stmt *CurrStmtParent = nullptr;
396 
397   /// \see aliasUseRequired().
398   bool ReplaceWithAliasUse = false;
399   /// \see aliasFromForInit().
400   bool AliasFromForInit = false;
401 };
402 
403 struct TUTrackingInfo {
404   /// Reset and initialize per-TU tracking information.
405   ///
406   /// Must be called before using container accessors.
TUTrackingInfoTUTrackingInfo407   TUTrackingInfo() : ParentFinder(new StmtAncestorASTVisitor) {}
408 
getParentFinderTUTrackingInfo409   StmtAncestorASTVisitor &getParentFinder() { return *ParentFinder; }
getGeneratedDeclsTUTrackingInfo410   StmtGeneratedVarNameMap &getGeneratedDecls() { return GeneratedDecls; }
getReplacedVarsTUTrackingInfo411   ReplacedVarsMap &getReplacedVars() { return ReplacedVars; }
412 
413 private:
414   std::unique_ptr<StmtAncestorASTVisitor> ParentFinder;
415   StmtGeneratedVarNameMap GeneratedDecls;
416   ReplacedVarsMap ReplacedVars;
417 };
418 
419 /// Create names for generated variables within a particular statement.
420 ///
421 /// VariableNamer uses a DeclContext as a reference point, checking for any
422 /// conflicting declarations higher up in the context or within SourceStmt.
423 /// It creates a variable name using hints from a source container and the old
424 /// index, if they exist.
425 class VariableNamer {
426 public:
427   // Supported naming styles.
428   enum NamingStyle {
429     NS_CamelBack,
430     NS_CamelCase,
431     NS_LowerCase,
432     NS_UpperCase,
433   };
434 
VariableNamer(StmtGeneratedVarNameMap * GeneratedDecls,const StmtParentMap * ReverseAST,const clang::Stmt * SourceStmt,const clang::VarDecl * OldIndex,const clang::ValueDecl * TheContainer,const clang::ASTContext * Context,NamingStyle Style)435   VariableNamer(StmtGeneratedVarNameMap *GeneratedDecls,
436                 const StmtParentMap *ReverseAST, const clang::Stmt *SourceStmt,
437                 const clang::VarDecl *OldIndex,
438                 const clang::ValueDecl *TheContainer,
439                 const clang::ASTContext *Context, NamingStyle Style)
440       : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST),
441         SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer),
442         Context(Context), Style(Style) {}
443 
444   /// Generate a new index name.
445   ///
446   /// Generates the name to be used for an inserted iterator. It relies on
447   /// declarationExists() to determine that there are no naming conflicts, and
448   /// tries to use some hints from the container name and the old index name.
449   std::string createIndexName();
450 
451 private:
452   StmtGeneratedVarNameMap *GeneratedDecls;
453   const StmtParentMap *ReverseAST;
454   const clang::Stmt *SourceStmt;
455   const clang::VarDecl *OldIndex;
456   const clang::ValueDecl *TheContainer;
457   const clang::ASTContext *Context;
458   const NamingStyle Style;
459 
460   // Determine whether or not a declaration that would conflict with Symbol
461   // exists in an outer context or in any statement contained in SourceStmt.
462   bool declarationExists(llvm::StringRef Symbol);
463 };
464 
465 } // namespace clang::tidy::modernize
466 
467 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_LOOP_CONVERT_UTILS_H
468