xref: /aosp_15_r20/external/regex-re2/re2/testing/tester.h (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi #ifndef RE2_TESTING_TESTER_H_
6*ccdc9c3eSSadaf Ebrahimi #define RE2_TESTING_TESTER_H_
7*ccdc9c3eSSadaf Ebrahimi 
8*ccdc9c3eSSadaf Ebrahimi // Comparative tester for regular expression matching.
9*ccdc9c3eSSadaf Ebrahimi // Checks all implementations against each other.
10*ccdc9c3eSSadaf Ebrahimi 
11*ccdc9c3eSSadaf Ebrahimi #include <vector>
12*ccdc9c3eSSadaf Ebrahimi 
13*ccdc9c3eSSadaf Ebrahimi #include "re2/stringpiece.h"
14*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h"
15*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
16*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h"
17*ccdc9c3eSSadaf Ebrahimi #include "util/pcre.h"
18*ccdc9c3eSSadaf Ebrahimi 
19*ccdc9c3eSSadaf Ebrahimi namespace re2 {
20*ccdc9c3eSSadaf Ebrahimi 
21*ccdc9c3eSSadaf Ebrahimi // All the supported regexp engines.
22*ccdc9c3eSSadaf Ebrahimi enum Engine {
23*ccdc9c3eSSadaf Ebrahimi   kEngineBacktrack = 0,    // Prog::UnsafeSearchBacktrack
24*ccdc9c3eSSadaf Ebrahimi   kEngineNFA,              // Prog::SearchNFA
25*ccdc9c3eSSadaf Ebrahimi   kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
26*ccdc9c3eSSadaf Ebrahimi   kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
27*ccdc9c3eSSadaf Ebrahimi   kEngineOnePass,          // Prog::SearchOnePass, if applicable
28*ccdc9c3eSSadaf Ebrahimi   kEngineBitState,         // Prog::SearchBitState
29*ccdc9c3eSSadaf Ebrahimi   kEngineRE2,              // RE2, all submatches
30*ccdc9c3eSSadaf Ebrahimi   kEngineRE2a,             // RE2, only ask for match[0]
31*ccdc9c3eSSadaf Ebrahimi   kEngineRE2b,             // RE2, only ask whether it matched
32*ccdc9c3eSSadaf Ebrahimi   kEnginePCRE,             // PCRE (util/pcre.h)
33*ccdc9c3eSSadaf Ebrahimi 
34*ccdc9c3eSSadaf Ebrahimi   kEngineMax,
35*ccdc9c3eSSadaf Ebrahimi };
36*ccdc9c3eSSadaf Ebrahimi 
37*ccdc9c3eSSadaf Ebrahimi // Make normal math on the enum preserve the type.
38*ccdc9c3eSSadaf Ebrahimi // By default, C++ doesn't define ++ on enum, and e+1 has type int.
39*ccdc9c3eSSadaf Ebrahimi static inline void operator++(Engine& e, int unused) {
40*ccdc9c3eSSadaf Ebrahimi   e = static_cast<Engine>(e+1);
41*ccdc9c3eSSadaf Ebrahimi }
42*ccdc9c3eSSadaf Ebrahimi 
43*ccdc9c3eSSadaf Ebrahimi static inline Engine operator+(Engine e, int i) {
44*ccdc9c3eSSadaf Ebrahimi   return static_cast<Engine>(static_cast<int>(e)+i);
45*ccdc9c3eSSadaf Ebrahimi }
46*ccdc9c3eSSadaf Ebrahimi 
47*ccdc9c3eSSadaf Ebrahimi // A TestInstance caches per-regexp state for a given
48*ccdc9c3eSSadaf Ebrahimi // regular expression in a given configuration
49*ccdc9c3eSSadaf Ebrahimi // (UTF-8 vs Latin1, longest vs first match, etc.).
50*ccdc9c3eSSadaf Ebrahimi class TestInstance {
51*ccdc9c3eSSadaf Ebrahimi  public:
52*ccdc9c3eSSadaf Ebrahimi   struct Result;
53*ccdc9c3eSSadaf Ebrahimi 
54*ccdc9c3eSSadaf Ebrahimi   TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
55*ccdc9c3eSSadaf Ebrahimi                Regexp::ParseFlags flags);
56*ccdc9c3eSSadaf Ebrahimi   ~TestInstance();
flags()57*ccdc9c3eSSadaf Ebrahimi   Regexp::ParseFlags flags() { return flags_; }
error()58*ccdc9c3eSSadaf Ebrahimi   bool error() { return error_; }
59*ccdc9c3eSSadaf Ebrahimi 
60*ccdc9c3eSSadaf Ebrahimi   // Runs a single test case: search in text, which is in context,
61*ccdc9c3eSSadaf Ebrahimi   // using the given anchoring.
62*ccdc9c3eSSadaf Ebrahimi   bool RunCase(const StringPiece& text, const StringPiece& context,
63*ccdc9c3eSSadaf Ebrahimi                Prog::Anchor anchor);
64*ccdc9c3eSSadaf Ebrahimi 
65*ccdc9c3eSSadaf Ebrahimi  private:
66*ccdc9c3eSSadaf Ebrahimi   // Runs a single search using the named engine type.
67*ccdc9c3eSSadaf Ebrahimi   void RunSearch(Engine type,
68*ccdc9c3eSSadaf Ebrahimi                  const StringPiece& text, const StringPiece& context,
69*ccdc9c3eSSadaf Ebrahimi                  Prog::Anchor anchor,
70*ccdc9c3eSSadaf Ebrahimi                  Result *result);
71*ccdc9c3eSSadaf Ebrahimi 
72*ccdc9c3eSSadaf Ebrahimi   void LogMatch(const char* prefix, Engine e, const StringPiece& text,
73*ccdc9c3eSSadaf Ebrahimi                 const StringPiece& context, Prog::Anchor anchor);
74*ccdc9c3eSSadaf Ebrahimi 
75*ccdc9c3eSSadaf Ebrahimi   const StringPiece regexp_str_;    // regexp being tested
76*ccdc9c3eSSadaf Ebrahimi   Prog::MatchKind kind_;            // kind of match
77*ccdc9c3eSSadaf Ebrahimi   Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
78*ccdc9c3eSSadaf Ebrahimi   bool error_;                      // error during constructor?
79*ccdc9c3eSSadaf Ebrahimi 
80*ccdc9c3eSSadaf Ebrahimi   Regexp* regexp_;                  // parsed regexp
81*ccdc9c3eSSadaf Ebrahimi   int num_captures_;                // regexp_->NumCaptures() cached
82*ccdc9c3eSSadaf Ebrahimi   Prog* prog_;                      // compiled program
83*ccdc9c3eSSadaf Ebrahimi   Prog* rprog_;                     // compiled reverse program
84*ccdc9c3eSSadaf Ebrahimi   PCRE* re_;                        // PCRE implementation
85*ccdc9c3eSSadaf Ebrahimi   RE2* re2_;                        // RE2 implementation
86*ccdc9c3eSSadaf Ebrahimi 
87*ccdc9c3eSSadaf Ebrahimi   TestInstance(const TestInstance&) = delete;
88*ccdc9c3eSSadaf Ebrahimi   TestInstance& operator=(const TestInstance&) = delete;
89*ccdc9c3eSSadaf Ebrahimi };
90*ccdc9c3eSSadaf Ebrahimi 
91*ccdc9c3eSSadaf Ebrahimi // A group of TestInstances for all possible configurations.
92*ccdc9c3eSSadaf Ebrahimi class Tester {
93*ccdc9c3eSSadaf Ebrahimi  public:
94*ccdc9c3eSSadaf Ebrahimi   explicit Tester(const StringPiece& regexp);
95*ccdc9c3eSSadaf Ebrahimi   ~Tester();
96*ccdc9c3eSSadaf Ebrahimi 
error()97*ccdc9c3eSSadaf Ebrahimi   bool error() { return error_; }
98*ccdc9c3eSSadaf Ebrahimi 
99*ccdc9c3eSSadaf Ebrahimi   // Runs a single test case: search in text, which is in context,
100*ccdc9c3eSSadaf Ebrahimi   // using the given anchoring.
101*ccdc9c3eSSadaf Ebrahimi   bool TestCase(const StringPiece& text, const StringPiece& context,
102*ccdc9c3eSSadaf Ebrahimi                 Prog::Anchor anchor);
103*ccdc9c3eSSadaf Ebrahimi 
104*ccdc9c3eSSadaf Ebrahimi   // Run TestCase(text, text, anchor) for all anchoring modes.
105*ccdc9c3eSSadaf Ebrahimi   bool TestInput(const StringPiece& text);
106*ccdc9c3eSSadaf Ebrahimi 
107*ccdc9c3eSSadaf Ebrahimi   // Run TestCase(text, context, anchor) for all anchoring modes.
108*ccdc9c3eSSadaf Ebrahimi   bool TestInputInContext(const StringPiece& text, const StringPiece& context);
109*ccdc9c3eSSadaf Ebrahimi 
110*ccdc9c3eSSadaf Ebrahimi  private:
111*ccdc9c3eSSadaf Ebrahimi   bool error_;
112*ccdc9c3eSSadaf Ebrahimi   std::vector<TestInstance*> v_;
113*ccdc9c3eSSadaf Ebrahimi 
114*ccdc9c3eSSadaf Ebrahimi   Tester(const Tester&) = delete;
115*ccdc9c3eSSadaf Ebrahimi   Tester& operator=(const Tester&) = delete;
116*ccdc9c3eSSadaf Ebrahimi };
117*ccdc9c3eSSadaf Ebrahimi 
118*ccdc9c3eSSadaf Ebrahimi // Run all possible tests using regexp and text.
119*ccdc9c3eSSadaf Ebrahimi bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
120*ccdc9c3eSSadaf Ebrahimi 
121*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
122*ccdc9c3eSSadaf Ebrahimi 
123*ccdc9c3eSSadaf Ebrahimi #endif  // RE2_TESTING_TESTER_H_
124