xref: /aosp_15_r20/external/cronet/third_party/re2/src/re2/simplify.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Rewrite POSIX and other features in re
6 // to use simple extended regular expression features.
7 // Also sort and simplify character classes.
8 
9 #include <algorithm>
10 #include <string>
11 
12 #include "util/logging.h"
13 #include "util/utf.h"
14 #include "re2/pod_array.h"
15 #include "re2/regexp.h"
16 #include "re2/walker-inl.h"
17 
18 namespace re2 {
19 
20 // Parses the regexp src and then simplifies it and sets *dst to the
21 // string representation of the simplified form.  Returns true on success.
22 // Returns false and sets *error (if error != NULL) on error.
SimplifyRegexp(absl::string_view src,ParseFlags flags,std::string * dst,RegexpStatus * status)23 bool Regexp::SimplifyRegexp(absl::string_view src, ParseFlags flags,
24                             std::string* dst, RegexpStatus* status) {
25   Regexp* re = Parse(src, flags, status);
26   if (re == NULL)
27     return false;
28   Regexp* sre = re->Simplify();
29   re->Decref();
30   if (sre == NULL) {
31     if (status) {
32       status->set_code(kRegexpInternalError);
33       status->set_error_arg(src);
34     }
35     return false;
36   }
37   *dst = sre->ToString();
38   sre->Decref();
39   return true;
40 }
41 
42 // Assuming the simple_ flags on the children are accurate,
43 // is this Regexp* simple?
ComputeSimple()44 bool Regexp::ComputeSimple() {
45   Regexp** subs;
46   switch (op_) {
47     case kRegexpNoMatch:
48     case kRegexpEmptyMatch:
49     case kRegexpLiteral:
50     case kRegexpLiteralString:
51     case kRegexpBeginLine:
52     case kRegexpEndLine:
53     case kRegexpBeginText:
54     case kRegexpWordBoundary:
55     case kRegexpNoWordBoundary:
56     case kRegexpEndText:
57     case kRegexpAnyChar:
58     case kRegexpAnyByte:
59     case kRegexpHaveMatch:
60       return true;
61     case kRegexpConcat:
62     case kRegexpAlternate:
63       // These are simple as long as the subpieces are simple.
64       subs = sub();
65       for (int i = 0; i < nsub_; i++)
66         if (!subs[i]->simple())
67           return false;
68       return true;
69     case kRegexpCharClass:
70       // Simple as long as the char class is not empty, not full.
71       if (ccb_ != NULL)
72         return !ccb_->empty() && !ccb_->full();
73       return !cc_->empty() && !cc_->full();
74     case kRegexpCapture:
75       subs = sub();
76       return subs[0]->simple();
77     case kRegexpStar:
78     case kRegexpPlus:
79     case kRegexpQuest:
80       subs = sub();
81       if (!subs[0]->simple())
82         return false;
83       switch (subs[0]->op_) {
84         case kRegexpStar:
85         case kRegexpPlus:
86         case kRegexpQuest:
87         case kRegexpEmptyMatch:
88         case kRegexpNoMatch:
89           return false;
90         default:
91           break;
92       }
93       return true;
94     case kRegexpRepeat:
95       return false;
96   }
97   LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
98   return false;
99 }
100 
101 // Walker subclass used by Simplify.
102 // Coalesces runs of star/plus/quest/repeat of the same literal along with any
103 // occurrences of that literal into repeats of that literal. It also works for
104 // char classes, any char and any byte.
105 // PostVisit creates the coalesced result, which should then be simplified.
106 class CoalesceWalker : public Regexp::Walker<Regexp*> {
107  public:
CoalesceWalker()108   CoalesceWalker() {}
109   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
110                             Regexp** child_args, int nchild_args);
111   virtual Regexp* Copy(Regexp* re);
112   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
113 
114  private:
115   // These functions are declared inside CoalesceWalker so that
116   // they can edit the private fields of the Regexps they construct.
117 
118   // Returns true if r1 and r2 can be coalesced. In particular, ensures that
119   // the parse flags are consistent. (They will not be checked again later.)
120   static bool CanCoalesce(Regexp* r1, Regexp* r2);
121 
122   // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
123   // will be empty match and the coalesced op. In other cases, where part of a
124   // literal string was removed to be coalesced, the array elements afterwards
125   // will be the coalesced op and the remainder of the literal string.
126   static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
127 
128   CoalesceWalker(const CoalesceWalker&) = delete;
129   CoalesceWalker& operator=(const CoalesceWalker&) = delete;
130 };
131 
132 // Walker subclass used by Simplify.
133 // The simplify walk is purely post-recursive: given the simplified children,
134 // PostVisit creates the simplified result.
135 // The child_args are simplified Regexp*s.
136 class SimplifyWalker : public Regexp::Walker<Regexp*> {
137  public:
SimplifyWalker()138   SimplifyWalker() {}
139   virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
140   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
141                             Regexp** child_args, int nchild_args);
142   virtual Regexp* Copy(Regexp* re);
143   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
144 
145  private:
146   // These functions are declared inside SimplifyWalker so that
147   // they can edit the private fields of the Regexps they construct.
148 
149   // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
150   // Caller must Decref return value when done with it.
151   static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
152 
153   // Simplifies the expression re{min,max} in terms of *, +, and ?.
154   // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
155   // Caller must Decref return value when done with it.
156   static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
157                                 Regexp::ParseFlags parse_flags);
158 
159   // Simplifies a character class by expanding any named classes
160   // into rune ranges.  Does not edit re.  Does not consume ref to re.
161   // Caller must Decref return value when done with it.
162   static Regexp* SimplifyCharClass(Regexp* re);
163 
164   SimplifyWalker(const SimplifyWalker&) = delete;
165   SimplifyWalker& operator=(const SimplifyWalker&) = delete;
166 };
167 
168 // Simplifies a regular expression, returning a new regexp.
169 // The new regexp uses traditional Unix egrep features only,
170 // plus the Perl (?:) non-capturing parentheses.
171 // Otherwise, no POSIX or Perl additions.  The new regexp
172 // captures exactly the same subexpressions (with the same indices)
173 // as the original.
174 // Does not edit current object.
175 // Caller must Decref() return value when done with it.
176 
Simplify()177 Regexp* Regexp::Simplify() {
178   CoalesceWalker cw;
179   Regexp* cre = cw.Walk(this, NULL);
180   if (cre == NULL)
181     return NULL;
182   if (cw.stopped_early()) {
183     cre->Decref();
184     return NULL;
185   }
186   SimplifyWalker sw;
187   Regexp* sre = sw.Walk(cre, NULL);
188   cre->Decref();
189   if (sre == NULL)
190     return NULL;
191   if (sw.stopped_early()) {
192     sre->Decref();
193     return NULL;
194   }
195   return sre;
196 }
197 
198 #define Simplify DontCallSimplify  // Avoid accidental recursion
199 
200 // Utility function for PostVisit implementations that compares re->sub() with
201 // child_args to determine whether any child_args changed. In the common case,
202 // where nothing changed, calls Decref() for all child_args and returns false,
203 // so PostVisit must return re->Incref(). Otherwise, returns true.
ChildArgsChanged(Regexp * re,Regexp ** child_args)204 static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
205   for (int i = 0; i < re->nsub(); i++) {
206     Regexp* sub = re->sub()[i];
207     Regexp* newsub = child_args[i];
208     if (newsub != sub)
209       return true;
210   }
211   for (int i = 0; i < re->nsub(); i++) {
212     Regexp* newsub = child_args[i];
213     newsub->Decref();
214   }
215   return false;
216 }
217 
Copy(Regexp * re)218 Regexp* CoalesceWalker::Copy(Regexp* re) {
219   return re->Incref();
220 }
221 
ShortVisit(Regexp * re,Regexp * parent_arg)222 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
223   // Should never be called: we use Walk(), not WalkExponential().
224 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
225   LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
226 #endif
227   return re->Incref();
228 }
229 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)230 Regexp* CoalesceWalker::PostVisit(Regexp* re,
231                                   Regexp* parent_arg,
232                                   Regexp* pre_arg,
233                                   Regexp** child_args,
234                                   int nchild_args) {
235   if (re->nsub() == 0)
236     return re->Incref();
237 
238   if (re->op() != kRegexpConcat) {
239     if (!ChildArgsChanged(re, child_args))
240       return re->Incref();
241 
242     // Something changed. Build a new op.
243     Regexp* nre = new Regexp(re->op(), re->parse_flags());
244     nre->AllocSub(re->nsub());
245     Regexp** nre_subs = nre->sub();
246     for (int i = 0; i < re->nsub(); i++)
247       nre_subs[i] = child_args[i];
248     // Repeats and Captures have additional data that must be copied.
249     if (re->op() == kRegexpRepeat) {
250       nre->min_ = re->min();
251       nre->max_ = re->max();
252     } else if (re->op() == kRegexpCapture) {
253       nre->cap_ = re->cap();
254     }
255     return nre;
256   }
257 
258   bool can_coalesce = false;
259   for (int i = 0; i < re->nsub(); i++) {
260     if (i+1 < re->nsub() &&
261         CanCoalesce(child_args[i], child_args[i+1])) {
262       can_coalesce = true;
263       break;
264     }
265   }
266   if (!can_coalesce) {
267     if (!ChildArgsChanged(re, child_args))
268       return re->Incref();
269 
270     // Something changed. Build a new op.
271     Regexp* nre = new Regexp(re->op(), re->parse_flags());
272     nre->AllocSub(re->nsub());
273     Regexp** nre_subs = nre->sub();
274     for (int i = 0; i < re->nsub(); i++)
275       nre_subs[i] = child_args[i];
276     return nre;
277   }
278 
279   for (int i = 0; i < re->nsub(); i++) {
280     if (i+1 < re->nsub() &&
281         CanCoalesce(child_args[i], child_args[i+1]))
282       DoCoalesce(&child_args[i], &child_args[i+1]);
283   }
284   // Determine how many empty matches were left by DoCoalesce.
285   int n = 0;
286   for (int i = n; i < re->nsub(); i++) {
287     if (child_args[i]->op() == kRegexpEmptyMatch)
288       n++;
289   }
290   // Build a new op.
291   Regexp* nre = new Regexp(re->op(), re->parse_flags());
292   nre->AllocSub(re->nsub() - n);
293   Regexp** nre_subs = nre->sub();
294   for (int i = 0, j = 0; i < re->nsub(); i++) {
295     if (child_args[i]->op() == kRegexpEmptyMatch) {
296       child_args[i]->Decref();
297       continue;
298     }
299     nre_subs[j] = child_args[i];
300     j++;
301   }
302   return nre;
303 }
304 
CanCoalesce(Regexp * r1,Regexp * r2)305 bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
306   // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
307   // any byte.
308   if ((r1->op() == kRegexpStar ||
309        r1->op() == kRegexpPlus ||
310        r1->op() == kRegexpQuest ||
311        r1->op() == kRegexpRepeat) &&
312       (r1->sub()[0]->op() == kRegexpLiteral ||
313        r1->sub()[0]->op() == kRegexpCharClass ||
314        r1->sub()[0]->op() == kRegexpAnyChar ||
315        r1->sub()[0]->op() == kRegexpAnyByte)) {
316     // r2 must be a star/plus/quest/repeat of the same literal, char class,
317     // any char or any byte.
318     if ((r2->op() == kRegexpStar ||
319          r2->op() == kRegexpPlus ||
320          r2->op() == kRegexpQuest ||
321          r2->op() == kRegexpRepeat) &&
322         Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
323         // The parse flags must be consistent.
324         ((r1->parse_flags() & Regexp::NonGreedy) ==
325          (r2->parse_flags() & Regexp::NonGreedy))) {
326       return true;
327     }
328     // ... OR an occurrence of that literal, char class, any char or any byte
329     if (Regexp::Equal(r1->sub()[0], r2)) {
330       return true;
331     }
332     // ... OR a literal string that begins with that literal.
333     if (r1->sub()[0]->op() == kRegexpLiteral &&
334         r2->op() == kRegexpLiteralString &&
335         r2->runes()[0] == r1->sub()[0]->rune() &&
336         // The parse flags must be consistent.
337         ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
338          (r2->parse_flags() & Regexp::FoldCase))) {
339       return true;
340     }
341   }
342   return false;
343 }
344 
DoCoalesce(Regexp ** r1ptr,Regexp ** r2ptr)345 void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
346   Regexp* r1 = *r1ptr;
347   Regexp* r2 = *r2ptr;
348 
349   Regexp* nre = Regexp::Repeat(
350       r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
351 
352   switch (r1->op()) {
353     case kRegexpStar:
354       nre->min_ = 0;
355       nre->max_ = -1;
356       break;
357 
358     case kRegexpPlus:
359       nre->min_ = 1;
360       nre->max_ = -1;
361       break;
362 
363     case kRegexpQuest:
364       nre->min_ = 0;
365       nre->max_ = 1;
366       break;
367 
368     case kRegexpRepeat:
369       nre->min_ = r1->min();
370       nre->max_ = r1->max();
371       break;
372 
373     default:
374       nre->Decref();
375       LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
376       return;
377   }
378 
379   switch (r2->op()) {
380     case kRegexpStar:
381       nre->max_ = -1;
382       goto LeaveEmpty;
383 
384     case kRegexpPlus:
385       nre->min_++;
386       nre->max_ = -1;
387       goto LeaveEmpty;
388 
389     case kRegexpQuest:
390       if (nre->max() != -1)
391         nre->max_++;
392       goto LeaveEmpty;
393 
394     case kRegexpRepeat:
395       nre->min_ += r2->min();
396       if (r2->max() == -1)
397         nre->max_ = -1;
398       else if (nre->max() != -1)
399         nre->max_ += r2->max();
400       goto LeaveEmpty;
401 
402     case kRegexpLiteral:
403     case kRegexpCharClass:
404     case kRegexpAnyChar:
405     case kRegexpAnyByte:
406       nre->min_++;
407       if (nre->max() != -1)
408         nre->max_++;
409       goto LeaveEmpty;
410 
411     LeaveEmpty:
412       *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
413       *r2ptr = nre;
414       break;
415 
416     case kRegexpLiteralString: {
417       Rune r = r1->sub()[0]->rune();
418       // Determine how much of the literal string is removed.
419       // We know that we have at least one rune. :)
420       int n = 1;
421       while (n < r2->nrunes() && r2->runes()[n] == r)
422         n++;
423       nre->min_ += n;
424       if (nre->max() != -1)
425         nre->max_ += n;
426       if (n == r2->nrunes())
427         goto LeaveEmpty;
428       *r1ptr = nre;
429       *r2ptr = Regexp::LiteralString(
430           &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
431       break;
432     }
433 
434     default:
435       nre->Decref();
436       LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
437       return;
438   }
439 
440   r1->Decref();
441   r2->Decref();
442 }
443 
Copy(Regexp * re)444 Regexp* SimplifyWalker::Copy(Regexp* re) {
445   return re->Incref();
446 }
447 
ShortVisit(Regexp * re,Regexp * parent_arg)448 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
449   // Should never be called: we use Walk(), not WalkExponential().
450 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
451   LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
452 #endif
453   return re->Incref();
454 }
455 
PreVisit(Regexp * re,Regexp * parent_arg,bool * stop)456 Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
457   if (re->simple()) {
458     *stop = true;
459     return re->Incref();
460   }
461   return NULL;
462 }
463 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)464 Regexp* SimplifyWalker::PostVisit(Regexp* re,
465                                   Regexp* parent_arg,
466                                   Regexp* pre_arg,
467                                   Regexp** child_args,
468                                   int nchild_args) {
469   switch (re->op()) {
470     case kRegexpNoMatch:
471     case kRegexpEmptyMatch:
472     case kRegexpLiteral:
473     case kRegexpLiteralString:
474     case kRegexpBeginLine:
475     case kRegexpEndLine:
476     case kRegexpBeginText:
477     case kRegexpWordBoundary:
478     case kRegexpNoWordBoundary:
479     case kRegexpEndText:
480     case kRegexpAnyChar:
481     case kRegexpAnyByte:
482     case kRegexpHaveMatch:
483       // All these are always simple.
484       re->simple_ = true;
485       return re->Incref();
486 
487     case kRegexpConcat:
488     case kRegexpAlternate: {
489       // These are simple as long as the subpieces are simple.
490       if (!ChildArgsChanged(re, child_args)) {
491         re->simple_ = true;
492         return re->Incref();
493       }
494       Regexp* nre = new Regexp(re->op(), re->parse_flags());
495       nre->AllocSub(re->nsub());
496       Regexp** nre_subs = nre->sub();
497       for (int i = 0; i < re->nsub(); i++)
498         nre_subs[i] = child_args[i];
499       nre->simple_ = true;
500       return nre;
501     }
502 
503     case kRegexpCapture: {
504       Regexp* newsub = child_args[0];
505       if (newsub == re->sub()[0]) {
506         newsub->Decref();
507         re->simple_ = true;
508         return re->Incref();
509       }
510       Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
511       nre->AllocSub(1);
512       nre->sub()[0] = newsub;
513       nre->cap_ = re->cap();
514       nre->simple_ = true;
515       return nre;
516     }
517 
518     case kRegexpStar:
519     case kRegexpPlus:
520     case kRegexpQuest: {
521       Regexp* newsub = child_args[0];
522       // Special case: repeat the empty string as much as
523       // you want, but it's still the empty string.
524       if (newsub->op() == kRegexpEmptyMatch)
525         return newsub;
526 
527       // These are simple as long as the subpiece is simple.
528       if (newsub == re->sub()[0]) {
529         newsub->Decref();
530         re->simple_ = true;
531         return re->Incref();
532       }
533 
534       // These are also idempotent if flags are constant.
535       if (re->op() == newsub->op() &&
536           re->parse_flags() == newsub->parse_flags())
537         return newsub;
538 
539       Regexp* nre = new Regexp(re->op(), re->parse_flags());
540       nre->AllocSub(1);
541       nre->sub()[0] = newsub;
542       nre->simple_ = true;
543       return nre;
544     }
545 
546     case kRegexpRepeat: {
547       Regexp* newsub = child_args[0];
548       // Special case: repeat the empty string as much as
549       // you want, but it's still the empty string.
550       if (newsub->op() == kRegexpEmptyMatch)
551         return newsub;
552 
553       Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
554                                    re->parse_flags());
555       newsub->Decref();
556       nre->simple_ = true;
557       return nre;
558     }
559 
560     case kRegexpCharClass: {
561       Regexp* nre = SimplifyCharClass(re);
562       nre->simple_ = true;
563       return nre;
564     }
565   }
566 
567   LOG(ERROR) << "Simplify case not handled: " << re->op();
568   return re->Incref();
569 }
570 
571 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
572 // Returns a new Regexp, handing the ref to the caller.
Concat2(Regexp * re1,Regexp * re2,Regexp::ParseFlags parse_flags)573 Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
574                                 Regexp::ParseFlags parse_flags) {
575   Regexp* re = new Regexp(kRegexpConcat, parse_flags);
576   re->AllocSub(2);
577   Regexp** subs = re->sub();
578   subs[0] = re1;
579   subs[1] = re2;
580   return re;
581 }
582 
583 // Returns true if re is an empty-width op.
IsEmptyOp(Regexp * re)584 static bool IsEmptyOp(Regexp* re) {
585   return (re->op() == kRegexpBeginLine ||
586           re->op() == kRegexpEndLine ||
587           re->op() == kRegexpWordBoundary ||
588           re->op() == kRegexpNoWordBoundary ||
589           re->op() == kRegexpBeginText ||
590           re->op() == kRegexpEndText);
591 }
592 
593 // Simplifies the expression re{min,max} in terms of *, +, and ?.
594 // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
595 // Caller must Decref return value when done with it.
596 // The result will *not* necessarily have the right capturing parens
597 // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
598 // but in the Regexp* representation, both (x) are marked as $1.
SimplifyRepeat(Regexp * re,int min,int max,Regexp::ParseFlags f)599 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
600                                        Regexp::ParseFlags f) {
601   // For an empty-width op OR a concatenation or alternation of empty-width
602   // ops, cap the repetition count at 1.
603   if (IsEmptyOp(re) ||
604       ((re->op() == kRegexpConcat ||
605         re->op() == kRegexpAlternate) &&
606        std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) {
607     min = std::min(min, 1);
608     max = std::min(max, 1);
609   }
610 
611   // x{n,} means at least n matches of x.
612   if (max == -1) {
613     // Special case: x{0,} is x*
614     if (min == 0)
615       return Regexp::Star(re->Incref(), f);
616 
617     // Special case: x{1,} is x+
618     if (min == 1)
619       return Regexp::Plus(re->Incref(), f);
620 
621     // General case: x{4,} is xxxx+
622     PODArray<Regexp*> nre_subs(min);
623     for (int i = 0; i < min-1; i++)
624       nre_subs[i] = re->Incref();
625     nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
626     return Regexp::Concat(nre_subs.data(), min, f);
627   }
628 
629   // Special case: (x){0} matches only empty string.
630   if (min == 0 && max == 0)
631     return new Regexp(kRegexpEmptyMatch, f);
632 
633   // Special case: x{1} is just x.
634   if (min == 1 && max == 1)
635     return re->Incref();
636 
637   // General case: x{n,m} means n copies of x and m copies of x?.
638   // The machine will do less work if we nest the final m copies,
639   // so that x{2,5} = xx(x(x(x)?)?)?
640 
641   // Build leading prefix: xx.  Capturing only on the last one.
642   Regexp* nre = NULL;
643   if (min > 0) {
644     PODArray<Regexp*> nre_subs(min);
645     for (int i = 0; i < min; i++)
646       nre_subs[i] = re->Incref();
647     nre = Regexp::Concat(nre_subs.data(), min, f);
648   }
649 
650   // Build and attach suffix: (x(x(x)?)?)?
651   if (max > min) {
652     Regexp* suf = Regexp::Quest(re->Incref(), f);
653     for (int i = min+1; i < max; i++)
654       suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
655     if (nre == NULL)
656       nre = suf;
657     else
658       nre = Concat2(nre, suf, f);
659   }
660 
661   if (nre == NULL) {
662     // Some degenerate case, like min > max, or min < max < 0.
663     // This shouldn't happen, because the parser rejects such regexps.
664     LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
665     return new Regexp(kRegexpNoMatch, f);
666   }
667 
668   return nre;
669 }
670 
671 // Simplifies a character class.
672 // Caller must Decref return value when done with it.
SimplifyCharClass(Regexp * re)673 Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
674   CharClass* cc = re->cc();
675 
676   // Special cases
677   if (cc->empty())
678     return new Regexp(kRegexpNoMatch, re->parse_flags());
679   if (cc->full())
680     return new Regexp(kRegexpAnyChar, re->parse_flags());
681 
682   return re->Incref();
683 }
684 
685 }  // namespace re2
686