xref: /aosp_15_r20/external/regex-re2/re2/testing/dump.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Dump the regexp into a string showing structure.
6 // Tested by parse_unittest.cc
7 
8 // This function traverses the regexp recursively,
9 // meaning that on inputs like Regexp::Simplify of
10 // a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
11 // it takes time and space exponential in the size of the
12 // original regular expression.  It can also use stack space
13 // linear in the size of the regular expression for inputs
14 // like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
15 // IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
16 // As a result, Dump is provided only in the testing
17 // library (see BUILD).
18 
19 #include <string>
20 
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "util/utf.h"
25 #include "re2/stringpiece.h"
26 #include "re2/regexp.h"
27 
28 // Cause a link error if this file is used outside of testing.
29 DECLARE_string(test_tmpdir);
30 
31 namespace re2 {
32 
33 static const char* kOpcodeNames[] = {
34   "bad",
35   "no",
36   "emp",
37   "lit",
38   "str",
39   "cat",
40   "alt",
41   "star",
42   "plus",
43   "que",
44   "rep",
45   "cap",
46   "dot",
47   "byte",
48   "bol",
49   "eol",
50   "wb",   // kRegexpWordBoundary
51   "nwb",  // kRegexpNoWordBoundary
52   "bot",
53   "eot",
54   "cc",
55   "match",
56 };
57 
58 // Create string representation of regexp with explicit structure.
59 // Nothing pretty, just for testing.
DumpRegexpAppending(Regexp * re,string * s)60 static void DumpRegexpAppending(Regexp* re, string* s) {
61   if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
62     StringAppendF(s, "op%d", re->op());
63   } else {
64     switch (re->op()) {
65       default:
66         break;
67       case kRegexpStar:
68       case kRegexpPlus:
69       case kRegexpQuest:
70       case kRegexpRepeat:
71         if (re->parse_flags() & Regexp::NonGreedy)
72           s->append("n");
73         break;
74     }
75     s->append(kOpcodeNames[re->op()]);
76     if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
77       Rune r = re->rune();
78       if ('a' <= r && r <= 'z')
79         s->append("fold");
80     }
81     if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
82       for (int i = 0; i < re->nrunes(); i++) {
83         Rune r = re->runes()[i];
84         if ('a' <= r && r <= 'z') {
85           s->append("fold");
86           break;
87         }
88       }
89     }
90   }
91   s->append("{");
92   switch (re->op()) {
93     default:
94       break;
95     case kRegexpEndText:
96       if (!(re->parse_flags() & Regexp::WasDollar)) {
97         s->append("\\z");
98       }
99       break;
100     case kRegexpLiteral: {
101       Rune r = re->rune();
102       char buf[UTFmax+1];
103       buf[runetochar(buf, &r)] = 0;
104       s->append(buf);
105       break;
106     }
107     case kRegexpLiteralString:
108       for (int i = 0; i < re->nrunes(); i++) {
109         Rune r = re->runes()[i];
110         char buf[UTFmax+1];
111         buf[runetochar(buf, &r)] = 0;
112         s->append(buf);
113       }
114       break;
115     case kRegexpConcat:
116     case kRegexpAlternate:
117       for (int i = 0; i < re->nsub(); i++)
118         DumpRegexpAppending(re->sub()[i], s);
119       break;
120     case kRegexpStar:
121     case kRegexpPlus:
122     case kRegexpQuest:
123       DumpRegexpAppending(re->sub()[0], s);
124       break;
125     case kRegexpCapture:
126       if (re->cap() == 0)
127         LOG(DFATAL) << "kRegexpCapture cap() == 0";
128       if (re->name()) {
129         s->append(*re->name());
130         s->append(":");
131       }
132       DumpRegexpAppending(re->sub()[0], s);
133       break;
134     case kRegexpRepeat:
135       s->append(StringPrintf("%d,%d ", re->min(), re->max()));
136       DumpRegexpAppending(re->sub()[0], s);
137       break;
138     case kRegexpCharClass: {
139       string sep;
140       for (CharClass::iterator it = re->cc()->begin();
141            it != re->cc()->end(); ++it) {
142         RuneRange rr = *it;
143         s->append(sep);
144         if (rr.lo == rr.hi)
145           s->append(StringPrintf("%#x", rr.lo));
146         else
147           s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
148         sep = " ";
149       }
150       break;
151     }
152   }
153   s->append("}");
154 }
155 
Dump()156 string Regexp::Dump() {
157   string s;
158 
159   // Make sure being called from a unit test.
160   if (FLAGS_test_tmpdir.empty()) {
161     LOG(ERROR) << "Cannot use except for testing.";
162     return s;
163   }
164 
165   DumpRegexpAppending(this, &s);
166   return s;
167 }
168 
169 }  // namespace re2
170