xref: /aosp_15_r20/external/regex-re2/re2/testing/charclass_test.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi // Test character class manipulations.
6*ccdc9c3eSSadaf Ebrahimi 
7*ccdc9c3eSSadaf Ebrahimi #include <stdio.h>
8*ccdc9c3eSSadaf Ebrahimi 
9*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
10*ccdc9c3eSSadaf Ebrahimi #include "util/utf.h"
11*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
12*ccdc9c3eSSadaf Ebrahimi 
13*ccdc9c3eSSadaf Ebrahimi namespace re2 {
14*ccdc9c3eSSadaf Ebrahimi 
15*ccdc9c3eSSadaf Ebrahimi struct CCTest {
16*ccdc9c3eSSadaf Ebrahimi   struct {
17*ccdc9c3eSSadaf Ebrahimi     Rune lo;
18*ccdc9c3eSSadaf Ebrahimi     Rune hi;
19*ccdc9c3eSSadaf Ebrahimi   } add[10];
20*ccdc9c3eSSadaf Ebrahimi   int remove;
21*ccdc9c3eSSadaf Ebrahimi   struct {
22*ccdc9c3eSSadaf Ebrahimi     Rune lo;
23*ccdc9c3eSSadaf Ebrahimi     Rune hi;
24*ccdc9c3eSSadaf Ebrahimi   } final[10];
25*ccdc9c3eSSadaf Ebrahimi };
26*ccdc9c3eSSadaf Ebrahimi 
27*ccdc9c3eSSadaf Ebrahimi static CCTest tests[] = {
28*ccdc9c3eSSadaf Ebrahimi   { { { 10, 20 }, {-1} }, -1,
29*ccdc9c3eSSadaf Ebrahimi     { { 10, 20 }, {-1} } },
30*ccdc9c3eSSadaf Ebrahimi 
31*ccdc9c3eSSadaf Ebrahimi   { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
32*ccdc9c3eSSadaf Ebrahimi     { { 10, 30 }, {-1} } },
33*ccdc9c3eSSadaf Ebrahimi 
34*ccdc9c3eSSadaf Ebrahimi   { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
35*ccdc9c3eSSadaf Ebrahimi     { { 10, 40 }, {-1} } },
36*ccdc9c3eSSadaf Ebrahimi 
37*ccdc9c3eSSadaf Ebrahimi   { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
38*ccdc9c3eSSadaf Ebrahimi     { { 0, 50 }, {-1} } },
39*ccdc9c3eSSadaf Ebrahimi 
40*ccdc9c3eSSadaf Ebrahimi   { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
41*ccdc9c3eSSadaf Ebrahimi     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
42*ccdc9c3eSSadaf Ebrahimi 
43*ccdc9c3eSSadaf Ebrahimi   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
44*ccdc9c3eSSadaf Ebrahimi     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
45*ccdc9c3eSSadaf Ebrahimi 
46*ccdc9c3eSSadaf Ebrahimi   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
47*ccdc9c3eSSadaf Ebrahimi     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
48*ccdc9c3eSSadaf Ebrahimi 
49*ccdc9c3eSSadaf Ebrahimi   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
50*ccdc9c3eSSadaf Ebrahimi     { { 5, 25 }, {-1} } },
51*ccdc9c3eSSadaf Ebrahimi 
52*ccdc9c3eSSadaf Ebrahimi   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
53*ccdc9c3eSSadaf Ebrahimi     { { 10, 23 }, {-1} } },
54*ccdc9c3eSSadaf Ebrahimi 
55*ccdc9c3eSSadaf Ebrahimi   // These check boundary cases during negation.
56*ccdc9c3eSSadaf Ebrahimi   { { { 0, Runemax }, {-1} }, -1,
57*ccdc9c3eSSadaf Ebrahimi     { { 0, Runemax }, {-1} } },
58*ccdc9c3eSSadaf Ebrahimi 
59*ccdc9c3eSSadaf Ebrahimi   { { { 0, 50 }, {-1} }, -1,
60*ccdc9c3eSSadaf Ebrahimi     { { 0, 50 }, {-1} } },
61*ccdc9c3eSSadaf Ebrahimi 
62*ccdc9c3eSSadaf Ebrahimi   { { { 50, Runemax }, {-1} }, -1,
63*ccdc9c3eSSadaf Ebrahimi     { { 50, Runemax }, {-1} } },
64*ccdc9c3eSSadaf Ebrahimi 
65*ccdc9c3eSSadaf Ebrahimi   // Check RemoveAbove.
66*ccdc9c3eSSadaf Ebrahimi   { { { 50, Runemax }, {-1} }, 255,
67*ccdc9c3eSSadaf Ebrahimi     { { 50, 255 }, {-1} } },
68*ccdc9c3eSSadaf Ebrahimi 
69*ccdc9c3eSSadaf Ebrahimi   { { { 50, Runemax }, {-1} }, 65535,
70*ccdc9c3eSSadaf Ebrahimi     { { 50, 65535 }, {-1} } },
71*ccdc9c3eSSadaf Ebrahimi 
72*ccdc9c3eSSadaf Ebrahimi   { { { 50, Runemax }, {-1} }, Runemax,
73*ccdc9c3eSSadaf Ebrahimi     { { 50, Runemax }, {-1} } },
74*ccdc9c3eSSadaf Ebrahimi 
75*ccdc9c3eSSadaf Ebrahimi   { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
76*ccdc9c3eSSadaf Ebrahimi     { { 50, 60 }, { 250, 255 }, {-1} } },
77*ccdc9c3eSSadaf Ebrahimi 
78*ccdc9c3eSSadaf Ebrahimi   { { { 50, 60 }, {-1} }, 255,
79*ccdc9c3eSSadaf Ebrahimi     { { 50, 60 }, {-1} } },
80*ccdc9c3eSSadaf Ebrahimi 
81*ccdc9c3eSSadaf Ebrahimi   { { { 350, 360 }, {-1} }, 255,
82*ccdc9c3eSSadaf Ebrahimi     { {-1} } },
83*ccdc9c3eSSadaf Ebrahimi 
84*ccdc9c3eSSadaf Ebrahimi   { { {-1} }, 255,
85*ccdc9c3eSSadaf Ebrahimi     { {-1} } },
86*ccdc9c3eSSadaf Ebrahimi };
87*ccdc9c3eSSadaf Ebrahimi 
88*ccdc9c3eSSadaf Ebrahimi template<class CharClass>
Broke(const char * desc,const CCTest * t,CharClass * cc)89*ccdc9c3eSSadaf Ebrahimi static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
90*ccdc9c3eSSadaf Ebrahimi   if (t == NULL) {
91*ccdc9c3eSSadaf Ebrahimi     printf("\t%s:", desc);
92*ccdc9c3eSSadaf Ebrahimi   } else {
93*ccdc9c3eSSadaf Ebrahimi     printf("\n");
94*ccdc9c3eSSadaf Ebrahimi     printf("CharClass added: [%s]", desc);
95*ccdc9c3eSSadaf Ebrahimi     for (int k = 0; t->add[k].lo >= 0; k++)
96*ccdc9c3eSSadaf Ebrahimi       printf(" %d-%d", t->add[k].lo, t->add[k].hi);
97*ccdc9c3eSSadaf Ebrahimi     printf("\n");
98*ccdc9c3eSSadaf Ebrahimi     if (t->remove >= 0)
99*ccdc9c3eSSadaf Ebrahimi       printf("Removed > %d\n", t->remove);
100*ccdc9c3eSSadaf Ebrahimi     printf("\twant:");
101*ccdc9c3eSSadaf Ebrahimi     for (int k = 0; t->final[k].lo >= 0; k++)
102*ccdc9c3eSSadaf Ebrahimi       printf(" %d-%d", t->final[k].lo, t->final[k].hi);
103*ccdc9c3eSSadaf Ebrahimi     printf("\n");
104*ccdc9c3eSSadaf Ebrahimi     printf("\thave:");
105*ccdc9c3eSSadaf Ebrahimi   }
106*ccdc9c3eSSadaf Ebrahimi 
107*ccdc9c3eSSadaf Ebrahimi   for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
108*ccdc9c3eSSadaf Ebrahimi     printf(" %d-%d", it->lo, it->hi);
109*ccdc9c3eSSadaf Ebrahimi   printf("\n");
110*ccdc9c3eSSadaf Ebrahimi }
111*ccdc9c3eSSadaf Ebrahimi 
ShouldContain(CCTest * t,int x)112*ccdc9c3eSSadaf Ebrahimi bool ShouldContain(CCTest *t, int x) {
113*ccdc9c3eSSadaf Ebrahimi   for (int j = 0; t->final[j].lo >= 0; j++)
114*ccdc9c3eSSadaf Ebrahimi     if (t->final[j].lo <= x && x <= t->final[j].hi)
115*ccdc9c3eSSadaf Ebrahimi       return true;
116*ccdc9c3eSSadaf Ebrahimi   return false;
117*ccdc9c3eSSadaf Ebrahimi }
118*ccdc9c3eSSadaf Ebrahimi 
119*ccdc9c3eSSadaf Ebrahimi // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
120*ccdc9c3eSSadaf Ebrahimi 
Negate(CharClass * cc)121*ccdc9c3eSSadaf Ebrahimi CharClass* Negate(CharClass *cc) {
122*ccdc9c3eSSadaf Ebrahimi   return cc->Negate();
123*ccdc9c3eSSadaf Ebrahimi }
124*ccdc9c3eSSadaf Ebrahimi 
Delete(CharClass * cc)125*ccdc9c3eSSadaf Ebrahimi void Delete(CharClass* cc) {
126*ccdc9c3eSSadaf Ebrahimi   cc->Delete();
127*ccdc9c3eSSadaf Ebrahimi }
128*ccdc9c3eSSadaf Ebrahimi 
Negate(CharClassBuilder * cc)129*ccdc9c3eSSadaf Ebrahimi CharClassBuilder* Negate(CharClassBuilder* cc) {
130*ccdc9c3eSSadaf Ebrahimi   CharClassBuilder* ncc = cc->Copy();
131*ccdc9c3eSSadaf Ebrahimi   ncc->Negate();
132*ccdc9c3eSSadaf Ebrahimi   return ncc;
133*ccdc9c3eSSadaf Ebrahimi }
134*ccdc9c3eSSadaf Ebrahimi 
Delete(CharClassBuilder * cc)135*ccdc9c3eSSadaf Ebrahimi void Delete(CharClassBuilder* cc) {
136*ccdc9c3eSSadaf Ebrahimi   delete cc;
137*ccdc9c3eSSadaf Ebrahimi }
138*ccdc9c3eSSadaf Ebrahimi 
139*ccdc9c3eSSadaf Ebrahimi template<class CharClass>
CorrectCC(CharClass * cc,CCTest * t,const char * desc)140*ccdc9c3eSSadaf Ebrahimi bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
141*ccdc9c3eSSadaf Ebrahimi   typename CharClass::iterator it = cc->begin();
142*ccdc9c3eSSadaf Ebrahimi   int size = 0;
143*ccdc9c3eSSadaf Ebrahimi   for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
144*ccdc9c3eSSadaf Ebrahimi     if (it == cc->end() ||
145*ccdc9c3eSSadaf Ebrahimi         it->lo != t->final[j].lo ||
146*ccdc9c3eSSadaf Ebrahimi         it->hi != t->final[j].hi) {
147*ccdc9c3eSSadaf Ebrahimi       Broke(desc, t, cc);
148*ccdc9c3eSSadaf Ebrahimi       return false;
149*ccdc9c3eSSadaf Ebrahimi     }
150*ccdc9c3eSSadaf Ebrahimi     size += it->hi - it->lo + 1;
151*ccdc9c3eSSadaf Ebrahimi   }
152*ccdc9c3eSSadaf Ebrahimi   if (it != cc->end()) {
153*ccdc9c3eSSadaf Ebrahimi     Broke(desc, t, cc);
154*ccdc9c3eSSadaf Ebrahimi     return false;
155*ccdc9c3eSSadaf Ebrahimi   }
156*ccdc9c3eSSadaf Ebrahimi   if (cc->size() != size) {
157*ccdc9c3eSSadaf Ebrahimi     Broke(desc, t, cc);
158*ccdc9c3eSSadaf Ebrahimi     printf("wrong size: want %d have %d\n", size, cc->size());
159*ccdc9c3eSSadaf Ebrahimi     return false;
160*ccdc9c3eSSadaf Ebrahimi   }
161*ccdc9c3eSSadaf Ebrahimi 
162*ccdc9c3eSSadaf Ebrahimi   for (int j = 0; j < 101; j++) {
163*ccdc9c3eSSadaf Ebrahimi     if (j == 100)
164*ccdc9c3eSSadaf Ebrahimi       j = Runemax;
165*ccdc9c3eSSadaf Ebrahimi     if (ShouldContain(t, j) != cc->Contains(j)) {
166*ccdc9c3eSSadaf Ebrahimi       Broke(desc, t, cc);
167*ccdc9c3eSSadaf Ebrahimi       printf("want contains(%d)=%d, got %d\n",
168*ccdc9c3eSSadaf Ebrahimi              j, ShouldContain(t, j), cc->Contains(j));
169*ccdc9c3eSSadaf Ebrahimi       return false;
170*ccdc9c3eSSadaf Ebrahimi     }
171*ccdc9c3eSSadaf Ebrahimi   }
172*ccdc9c3eSSadaf Ebrahimi 
173*ccdc9c3eSSadaf Ebrahimi   CharClass* ncc = Negate(cc);
174*ccdc9c3eSSadaf Ebrahimi   for (int j = 0; j < 101; j++) {
175*ccdc9c3eSSadaf Ebrahimi     if (j == 100)
176*ccdc9c3eSSadaf Ebrahimi       j = Runemax;
177*ccdc9c3eSSadaf Ebrahimi     if (ShouldContain(t, j) == ncc->Contains(j)) {
178*ccdc9c3eSSadaf Ebrahimi       Broke(desc, t, cc);
179*ccdc9c3eSSadaf Ebrahimi       Broke("ncc", NULL, ncc);
180*ccdc9c3eSSadaf Ebrahimi       printf("want ncc contains(%d)!=%d, got %d\n",
181*ccdc9c3eSSadaf Ebrahimi              j, ShouldContain(t, j), ncc->Contains(j));
182*ccdc9c3eSSadaf Ebrahimi       Delete(ncc);
183*ccdc9c3eSSadaf Ebrahimi       return false;
184*ccdc9c3eSSadaf Ebrahimi     }
185*ccdc9c3eSSadaf Ebrahimi     if (ncc->size() != Runemax+1 - cc->size()) {
186*ccdc9c3eSSadaf Ebrahimi       Broke(desc, t, cc);
187*ccdc9c3eSSadaf Ebrahimi       Broke("ncc", NULL, ncc);
188*ccdc9c3eSSadaf Ebrahimi       printf("ncc size should be %d is %d\n",
189*ccdc9c3eSSadaf Ebrahimi              Runemax+1 - cc->size(), ncc->size());
190*ccdc9c3eSSadaf Ebrahimi       Delete(ncc);
191*ccdc9c3eSSadaf Ebrahimi       return false;
192*ccdc9c3eSSadaf Ebrahimi     }
193*ccdc9c3eSSadaf Ebrahimi   }
194*ccdc9c3eSSadaf Ebrahimi   Delete(ncc);
195*ccdc9c3eSSadaf Ebrahimi   return true;
196*ccdc9c3eSSadaf Ebrahimi }
197*ccdc9c3eSSadaf Ebrahimi 
TEST(TestCharClassBuilder,Adds)198*ccdc9c3eSSadaf Ebrahimi TEST(TestCharClassBuilder, Adds) {
199*ccdc9c3eSSadaf Ebrahimi   int nfail = 0;
200*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(tests); i++) {
201*ccdc9c3eSSadaf Ebrahimi     CharClassBuilder ccb;
202*ccdc9c3eSSadaf Ebrahimi     CCTest* t = &tests[i];
203*ccdc9c3eSSadaf Ebrahimi     for (int j = 0; t->add[j].lo >= 0; j++)
204*ccdc9c3eSSadaf Ebrahimi       ccb.AddRange(t->add[j].lo, t->add[j].hi);
205*ccdc9c3eSSadaf Ebrahimi     if (t->remove >= 0)
206*ccdc9c3eSSadaf Ebrahimi       ccb.RemoveAbove(t->remove);
207*ccdc9c3eSSadaf Ebrahimi     if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
208*ccdc9c3eSSadaf Ebrahimi       nfail++;
209*ccdc9c3eSSadaf Ebrahimi     CharClass* cc = ccb.GetCharClass();
210*ccdc9c3eSSadaf Ebrahimi     if (!CorrectCC(cc, t, "before copy (CharClass)"))
211*ccdc9c3eSSadaf Ebrahimi       nfail++;
212*ccdc9c3eSSadaf Ebrahimi     cc->Delete();
213*ccdc9c3eSSadaf Ebrahimi 
214*ccdc9c3eSSadaf Ebrahimi     CharClassBuilder *ccb1 = ccb.Copy();
215*ccdc9c3eSSadaf Ebrahimi     if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
216*ccdc9c3eSSadaf Ebrahimi       nfail++;
217*ccdc9c3eSSadaf Ebrahimi     cc = ccb.GetCharClass();
218*ccdc9c3eSSadaf Ebrahimi     if (!CorrectCC(cc, t, "after copy (CharClass)"))
219*ccdc9c3eSSadaf Ebrahimi       nfail++;
220*ccdc9c3eSSadaf Ebrahimi     cc->Delete();
221*ccdc9c3eSSadaf Ebrahimi     delete ccb1;
222*ccdc9c3eSSadaf Ebrahimi   }
223*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(nfail, 0);
224*ccdc9c3eSSadaf Ebrahimi }
225*ccdc9c3eSSadaf Ebrahimi 
226*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
227