xref: /aosp_15_r20/external/cronet/third_party/re2/src/re2/walker-inl.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_WALKER_INL_H_
6 #define RE2_WALKER_INL_H_
7 
8 // Helper class for traversing Regexps without recursion.
9 // Clients should declare their own subclasses that override
10 // the PreVisit and PostVisit methods, which are called before
11 // and after visiting the subexpressions.
12 
13 // Not quite the Visitor pattern, because (among other things)
14 // the Visitor pattern is recursive.
15 
16 #include <stack>
17 
18 #include "absl/base/macros.h"
19 #include "util/logging.h"
20 #include "re2/regexp.h"
21 
22 namespace re2 {
23 
24 template<typename T> struct WalkState;
25 
26 template<typename T> class Regexp::Walker {
27  public:
28   Walker();
29   virtual ~Walker();
30 
31   // Virtual method called before visiting re's children.
32   // PreVisit passes ownership of its return value to its caller.
33   // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
34   // and passed to the child PreVisits and PostVisits as parent_arg.
35   // At the top-most Regexp, parent_arg is arg passed to walk.
36   // If PreVisit sets *stop to true, the walk does not recurse
37   // into the children.  Instead it behaves as though the return
38   // value from PreVisit is the return value from PostVisit.
39   // The default PreVisit returns parent_arg.
40   virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
41 
42   // Virtual method called after visiting re's children.
43   // The pre_arg is the T that PreVisit returned.
44   // The child_args is a vector of the T that the child PostVisits returned.
45   // PostVisit takes ownership of pre_arg.
46   // PostVisit takes ownership of the Ts
47   // in *child_args, but not the vector itself.
48   // PostVisit passes ownership of its return value
49   // to its caller.
50   // The default PostVisit simply returns pre_arg.
51   virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
52                       T* child_args, int nchild_args);
53 
54   // Virtual method called to copy a T,
55   // when Walk notices that more than one child is the same re.
56   virtual T Copy(T arg);
57 
58   // Virtual method called to do a "quick visit" of the re,
59   // but not its children.  Only called once the visit budget
60   // has been used up and we're trying to abort the walk
61   // as quickly as possible.  Should return a value that
62   // makes sense for the parent PostVisits still to be run.
63   // This function is (hopefully) only called by
64   // WalkExponential, but must be implemented by all clients,
65   // just in case.
66   virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
67 
68   // Walks over a regular expression.
69   // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
70   // Returns the T returned by PostVisit on re.
71   T Walk(Regexp* re, T top_arg);
72 
73   // Like Walk, but doesn't use Copy.  This can lead to
74   // exponential runtimes on cross-linked Regexps like the
75   // ones generated by Simplify.  To help limit this,
76   // at most max_visits nodes will be visited and then
77   // the walk will be cut off early.
78   // If the walk *is* cut off early, ShortVisit(re)
79   // will be called on regexps that cannot be fully
80   // visited rather than calling PreVisit/PostVisit.
81   T WalkExponential(Regexp* re, T top_arg, int max_visits);
82 
83   // Clears the stack.  Should never be necessary, since
84   // Walk always enters and exits with an empty stack.
85   // Logs DFATAL if stack is not already clear.
86   void Reset();
87 
88   // Returns whether walk was cut off.
stopped_early()89   bool stopped_early() { return stopped_early_; }
90 
91  private:
92   // Walk state for the entire traversal.
93   std::stack<WalkState<T>> stack_;
94   bool stopped_early_;
95   int max_visits_;
96 
97   T WalkInternal(Regexp* re, T top_arg, bool use_copy);
98 
99   Walker(const Walker&) = delete;
100   Walker& operator=(const Walker&) = delete;
101 };
102 
PreVisit(Regexp * re,T parent_arg,bool * stop)103 template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
104                                                    T parent_arg,
105                                                    bool* stop) {
106   return parent_arg;
107 }
108 
PostVisit(Regexp * re,T parent_arg,T pre_arg,T * child_args,int nchild_args)109 template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
110                                                     T parent_arg,
111                                                     T pre_arg,
112                                                     T* child_args,
113                                                     int nchild_args) {
114   return pre_arg;
115 }
116 
Copy(T arg)117 template<typename T> T Regexp::Walker<T>::Copy(T arg) {
118   return arg;
119 }
120 
121 // State about a single level in the traversal.
122 template<typename T> struct WalkState {
WalkStateWalkState123   WalkState(Regexp* re, T parent)
124     : re(re),
125       n(-1),
126       parent_arg(parent),
127       child_args(NULL) { }
128 
129   Regexp* re;  // The regexp
130   int n;  // The index of the next child to process; -1 means need to PreVisit
131   T parent_arg;  // Accumulated arguments.
132   T pre_arg;
133   T child_arg;  // One-element buffer for child_args.
134   T* child_args;
135 };
136 
Walker()137 template<typename T> Regexp::Walker<T>::Walker() {
138   stopped_early_ = false;
139 }
140 
~Walker()141 template<typename T> Regexp::Walker<T>::~Walker() {
142   Reset();
143 }
144 
145 // Clears the stack.  Should never be necessary, since
146 // Walk always enters and exits with an empty stack.
147 // Logs DFATAL if stack is not already clear.
Reset()148 template<typename T> void Regexp::Walker<T>::Reset() {
149   if (!stack_.empty()) {
150     LOG(DFATAL) << "Stack not empty.";
151     while (!stack_.empty()) {
152       if (stack_.top().re->nsub_ > 1)
153         delete[] stack_.top().child_args;
154       stack_.pop();
155     }
156   }
157 }
158 
WalkInternal(Regexp * re,T top_arg,bool use_copy)159 template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
160                                                        bool use_copy) {
161   Reset();
162 
163   if (re == NULL) {
164     LOG(DFATAL) << "Walk NULL";
165     return top_arg;
166   }
167 
168   stack_.push(WalkState<T>(re, top_arg));
169 
170   WalkState<T>* s;
171   for (;;) {
172     T t;
173     s = &stack_.top();
174     re = s->re;
175     switch (s->n) {
176       case -1: {
177         if (--max_visits_ < 0) {
178           stopped_early_ = true;
179           t = ShortVisit(re, s->parent_arg);
180           break;
181         }
182         bool stop = false;
183         s->pre_arg = PreVisit(re, s->parent_arg, &stop);
184         if (stop) {
185           t = s->pre_arg;
186           break;
187         }
188         s->n = 0;
189         s->child_args = NULL;
190         if (re->nsub_ == 1)
191           s->child_args = &s->child_arg;
192         else if (re->nsub_ > 1)
193           s->child_args = new T[re->nsub_];
194         ABSL_FALLTHROUGH_INTENDED;
195       }
196       default: {
197         if (re->nsub_ > 0) {
198           Regexp** sub = re->sub();
199           if (s->n < re->nsub_) {
200             if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
201               s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
202               s->n++;
203             } else {
204               stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
205             }
206             continue;
207           }
208         }
209 
210         t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
211         if (re->nsub_ > 1)
212           delete[] s->child_args;
213         break;
214       }
215     }
216 
217     // We've finished stack_.top().
218     // Update next guy down.
219     stack_.pop();
220     if (stack_.empty())
221       return t;
222     s = &stack_.top();
223     if (s->child_args != NULL)
224       s->child_args[s->n] = t;
225     else
226       s->child_arg = t;
227     s->n++;
228   }
229 }
230 
Walk(Regexp * re,T top_arg)231 template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
232   // Without the exponential walking behavior,
233   // this budget should be more than enough for any
234   // regexp, and yet not enough to get us in trouble
235   // as far as CPU time.
236   max_visits_ = 1000000;
237   return WalkInternal(re, top_arg, true);
238 }
239 
WalkExponential(Regexp * re,T top_arg,int max_visits)240 template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
241                                                           int max_visits) {
242   max_visits_ = max_visits;
243   return WalkInternal(re, top_arg, false);
244 }
245 
246 }  // namespace re2
247 
248 #endif  // RE2_WALKER_INL_H_
249