xref: /aosp_15_r20/external/regex-re2/re2/testing/mimics_pcre_test.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
6*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
7*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h"
8*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
9*ccdc9c3eSSadaf Ebrahimi 
10*ccdc9c3eSSadaf Ebrahimi namespace re2 {
11*ccdc9c3eSSadaf Ebrahimi 
12*ccdc9c3eSSadaf Ebrahimi struct PCRETest {
13*ccdc9c3eSSadaf Ebrahimi   const char* regexp;
14*ccdc9c3eSSadaf Ebrahimi   bool should_match;
15*ccdc9c3eSSadaf Ebrahimi };
16*ccdc9c3eSSadaf Ebrahimi 
17*ccdc9c3eSSadaf Ebrahimi static PCRETest tests[] = {
18*ccdc9c3eSSadaf Ebrahimi   // Most things should behave exactly.
19*ccdc9c3eSSadaf Ebrahimi   { "abc",       true  },
20*ccdc9c3eSSadaf Ebrahimi   { "(a|b)c",    true  },
21*ccdc9c3eSSadaf Ebrahimi   { "(a*|b)c",   true  },
22*ccdc9c3eSSadaf Ebrahimi   { "(a|b*)c",   true  },
23*ccdc9c3eSSadaf Ebrahimi   { "a(b|c)d",   true  },
24*ccdc9c3eSSadaf Ebrahimi   { "a(()|())c", true  },
25*ccdc9c3eSSadaf Ebrahimi   { "ab*c",      true  },
26*ccdc9c3eSSadaf Ebrahimi   { "ab+c",      true  },
27*ccdc9c3eSSadaf Ebrahimi   { "a(b*|c*)d", true  },
28*ccdc9c3eSSadaf Ebrahimi   { "\\W",       true  },
29*ccdc9c3eSSadaf Ebrahimi   { "\\W{1,2}",  true  },
30*ccdc9c3eSSadaf Ebrahimi   { "\\d",       true  },
31*ccdc9c3eSSadaf Ebrahimi 
32*ccdc9c3eSSadaf Ebrahimi   // Check that repeated empty strings do not.
33*ccdc9c3eSSadaf Ebrahimi   { "(a*)*",     false },
34*ccdc9c3eSSadaf Ebrahimi   { "x(a*)*y",   false },
35*ccdc9c3eSSadaf Ebrahimi   { "(a*)+",     false },
36*ccdc9c3eSSadaf Ebrahimi   { "(a+)*",     true  },
37*ccdc9c3eSSadaf Ebrahimi   { "(a+)+",     true  },
38*ccdc9c3eSSadaf Ebrahimi   { "(a+)+",     true  },
39*ccdc9c3eSSadaf Ebrahimi 
40*ccdc9c3eSSadaf Ebrahimi   // \v is the only character class that shouldn't.
41*ccdc9c3eSSadaf Ebrahimi   { "\\b",       true  },
42*ccdc9c3eSSadaf Ebrahimi   { "\\v",       false },
43*ccdc9c3eSSadaf Ebrahimi   { "\\d",       true  },
44*ccdc9c3eSSadaf Ebrahimi 
45*ccdc9c3eSSadaf Ebrahimi   // The handling of ^ in multi-line mode is different, as is
46*ccdc9c3eSSadaf Ebrahimi   // the handling of $ in single-line mode.  (Both involve
47*ccdc9c3eSSadaf Ebrahimi   // boundary cases if the string ends with \n.)
48*ccdc9c3eSSadaf Ebrahimi   { "\\A",       true  },
49*ccdc9c3eSSadaf Ebrahimi   { "\\z",       true  },
50*ccdc9c3eSSadaf Ebrahimi   { "(?m)^",     false },
51*ccdc9c3eSSadaf Ebrahimi   { "(?m)$",     true  },
52*ccdc9c3eSSadaf Ebrahimi   { "(?-m)^",    true  },
53*ccdc9c3eSSadaf Ebrahimi   { "(?-m)$",    false },  // In PCRE, == \Z
54*ccdc9c3eSSadaf Ebrahimi   { "(?m)\\A",   true  },
55*ccdc9c3eSSadaf Ebrahimi   { "(?m)\\z",   true  },
56*ccdc9c3eSSadaf Ebrahimi   { "(?-m)\\A",  true  },
57*ccdc9c3eSSadaf Ebrahimi   { "(?-m)\\z",  true  },
58*ccdc9c3eSSadaf Ebrahimi };
59*ccdc9c3eSSadaf Ebrahimi 
TEST(MimicsPCRE,SimpleTests)60*ccdc9c3eSSadaf Ebrahimi TEST(MimicsPCRE, SimpleTests) {
61*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(tests); i++) {
62*ccdc9c3eSSadaf Ebrahimi     const PCRETest& t = tests[i];
63*ccdc9c3eSSadaf Ebrahimi     for (int j = 0; j < 2; j++) {
64*ccdc9c3eSSadaf Ebrahimi       Regexp::ParseFlags flags = Regexp::LikePerl;
65*ccdc9c3eSSadaf Ebrahimi       if (j == 0)
66*ccdc9c3eSSadaf Ebrahimi         flags = flags | Regexp::Latin1;
67*ccdc9c3eSSadaf Ebrahimi       Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
68*ccdc9c3eSSadaf Ebrahimi       ASSERT_TRUE(re != NULL) << " " << t.regexp;
69*ccdc9c3eSSadaf Ebrahimi       ASSERT_EQ(t.should_match, re->MimicsPCRE())
70*ccdc9c3eSSadaf Ebrahimi         << " " << t.regexp << " "
71*ccdc9c3eSSadaf Ebrahimi         << (j==0 ? "latin1" : "utf");
72*ccdc9c3eSSadaf Ebrahimi       re->Decref();
73*ccdc9c3eSSadaf Ebrahimi     }
74*ccdc9c3eSSadaf Ebrahimi   }
75*ccdc9c3eSSadaf Ebrahimi }
76*ccdc9c3eSSadaf Ebrahimi 
77*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
78