1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Dump the regexp into a string showing structure.
6 // Tested by parse_unittest.cc
7
8 // This function traverses the regexp recursively,
9 // meaning that on inputs like Regexp::Simplify of
10 // a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
11 // it takes time and space exponential in the size of the
12 // original regular expression. It can also use stack space
13 // linear in the size of the regular expression for inputs
14 // like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
15 // IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
16 // As a result, Dump is provided only in the testing
17 // library (see BUILD).
18
19 #include <string>
20
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "util/utf.h"
25 #include "re2/stringpiece.h"
26 #include "re2/regexp.h"
27
28 // Cause a link error if this file is used outside of testing.
29 DECLARE_string(test_tmpdir);
30
31 namespace re2 {
32
33 static const char* kOpcodeNames[] = {
34 "bad",
35 "no",
36 "emp",
37 "lit",
38 "str",
39 "cat",
40 "alt",
41 "star",
42 "plus",
43 "que",
44 "rep",
45 "cap",
46 "dot",
47 "byte",
48 "bol",
49 "eol",
50 "wb", // kRegexpWordBoundary
51 "nwb", // kRegexpNoWordBoundary
52 "bot",
53 "eot",
54 "cc",
55 "match",
56 };
57
58 // Create string representation of regexp with explicit structure.
59 // Nothing pretty, just for testing.
DumpRegexpAppending(Regexp * re,string * s)60 static void DumpRegexpAppending(Regexp* re, string* s) {
61 if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
62 StringAppendF(s, "op%d", re->op());
63 } else {
64 switch (re->op()) {
65 default:
66 break;
67 case kRegexpStar:
68 case kRegexpPlus:
69 case kRegexpQuest:
70 case kRegexpRepeat:
71 if (re->parse_flags() & Regexp::NonGreedy)
72 s->append("n");
73 break;
74 }
75 s->append(kOpcodeNames[re->op()]);
76 if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
77 Rune r = re->rune();
78 if ('a' <= r && r <= 'z')
79 s->append("fold");
80 }
81 if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
82 for (int i = 0; i < re->nrunes(); i++) {
83 Rune r = re->runes()[i];
84 if ('a' <= r && r <= 'z') {
85 s->append("fold");
86 break;
87 }
88 }
89 }
90 }
91 s->append("{");
92 switch (re->op()) {
93 default:
94 break;
95 case kRegexpEndText:
96 if (!(re->parse_flags() & Regexp::WasDollar)) {
97 s->append("\\z");
98 }
99 break;
100 case kRegexpLiteral: {
101 Rune r = re->rune();
102 char buf[UTFmax+1];
103 buf[runetochar(buf, &r)] = 0;
104 s->append(buf);
105 break;
106 }
107 case kRegexpLiteralString:
108 for (int i = 0; i < re->nrunes(); i++) {
109 Rune r = re->runes()[i];
110 char buf[UTFmax+1];
111 buf[runetochar(buf, &r)] = 0;
112 s->append(buf);
113 }
114 break;
115 case kRegexpConcat:
116 case kRegexpAlternate:
117 for (int i = 0; i < re->nsub(); i++)
118 DumpRegexpAppending(re->sub()[i], s);
119 break;
120 case kRegexpStar:
121 case kRegexpPlus:
122 case kRegexpQuest:
123 DumpRegexpAppending(re->sub()[0], s);
124 break;
125 case kRegexpCapture:
126 if (re->cap() == 0)
127 LOG(DFATAL) << "kRegexpCapture cap() == 0";
128 if (re->name()) {
129 s->append(*re->name());
130 s->append(":");
131 }
132 DumpRegexpAppending(re->sub()[0], s);
133 break;
134 case kRegexpRepeat:
135 s->append(StringPrintf("%d,%d ", re->min(), re->max()));
136 DumpRegexpAppending(re->sub()[0], s);
137 break;
138 case kRegexpCharClass: {
139 string sep;
140 for (CharClass::iterator it = re->cc()->begin();
141 it != re->cc()->end(); ++it) {
142 RuneRange rr = *it;
143 s->append(sep);
144 if (rr.lo == rr.hi)
145 s->append(StringPrintf("%#x", rr.lo));
146 else
147 s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
148 sep = " ";
149 }
150 break;
151 }
152 }
153 s->append("}");
154 }
155
Dump()156 string Regexp::Dump() {
157 string s;
158
159 // Make sure being called from a unit test.
160 if (FLAGS_test_tmpdir.empty()) {
161 LOG(ERROR) << "Cannot use except for testing.";
162 return s;
163 }
164
165 DumpRegexpAppending(this, &s);
166 return s;
167 }
168
169 } // namespace re2
170