1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors. All Rights Reserved. 2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style 3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file. 4*ccdc9c3eSSadaf Ebrahimi 5*ccdc9c3eSSadaf Ebrahimi #ifndef RE2_TESTING_TESTER_H_ 6*ccdc9c3eSSadaf Ebrahimi #define RE2_TESTING_TESTER_H_ 7*ccdc9c3eSSadaf Ebrahimi 8*ccdc9c3eSSadaf Ebrahimi // Comparative tester for regular expression matching. 9*ccdc9c3eSSadaf Ebrahimi // Checks all implementations against each other. 10*ccdc9c3eSSadaf Ebrahimi 11*ccdc9c3eSSadaf Ebrahimi #include <vector> 12*ccdc9c3eSSadaf Ebrahimi 13*ccdc9c3eSSadaf Ebrahimi #include "re2/stringpiece.h" 14*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h" 15*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h" 16*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h" 17*ccdc9c3eSSadaf Ebrahimi #include "util/pcre.h" 18*ccdc9c3eSSadaf Ebrahimi 19*ccdc9c3eSSadaf Ebrahimi namespace re2 { 20*ccdc9c3eSSadaf Ebrahimi 21*ccdc9c3eSSadaf Ebrahimi // All the supported regexp engines. 22*ccdc9c3eSSadaf Ebrahimi enum Engine { 23*ccdc9c3eSSadaf Ebrahimi kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack 24*ccdc9c3eSSadaf Ebrahimi kEngineNFA, // Prog::SearchNFA 25*ccdc9c3eSSadaf Ebrahimi kEngineDFA, // Prog::SearchDFA, only ask whether it matched 26*ccdc9c3eSSadaf Ebrahimi kEngineDFA1, // Prog::SearchDFA, ask for match[0] 27*ccdc9c3eSSadaf Ebrahimi kEngineOnePass, // Prog::SearchOnePass, if applicable 28*ccdc9c3eSSadaf Ebrahimi kEngineBitState, // Prog::SearchBitState 29*ccdc9c3eSSadaf Ebrahimi kEngineRE2, // RE2, all submatches 30*ccdc9c3eSSadaf Ebrahimi kEngineRE2a, // RE2, only ask for match[0] 31*ccdc9c3eSSadaf Ebrahimi kEngineRE2b, // RE2, only ask whether it matched 32*ccdc9c3eSSadaf Ebrahimi kEnginePCRE, // PCRE (util/pcre.h) 33*ccdc9c3eSSadaf Ebrahimi 34*ccdc9c3eSSadaf Ebrahimi kEngineMax, 35*ccdc9c3eSSadaf Ebrahimi }; 36*ccdc9c3eSSadaf Ebrahimi 37*ccdc9c3eSSadaf Ebrahimi // Make normal math on the enum preserve the type. 38*ccdc9c3eSSadaf Ebrahimi // By default, C++ doesn't define ++ on enum, and e+1 has type int. 39*ccdc9c3eSSadaf Ebrahimi static inline void operator++(Engine& e, int unused) { 40*ccdc9c3eSSadaf Ebrahimi e = static_cast<Engine>(e+1); 41*ccdc9c3eSSadaf Ebrahimi } 42*ccdc9c3eSSadaf Ebrahimi 43*ccdc9c3eSSadaf Ebrahimi static inline Engine operator+(Engine e, int i) { 44*ccdc9c3eSSadaf Ebrahimi return static_cast<Engine>(static_cast<int>(e)+i); 45*ccdc9c3eSSadaf Ebrahimi } 46*ccdc9c3eSSadaf Ebrahimi 47*ccdc9c3eSSadaf Ebrahimi // A TestInstance caches per-regexp state for a given 48*ccdc9c3eSSadaf Ebrahimi // regular expression in a given configuration 49*ccdc9c3eSSadaf Ebrahimi // (UTF-8 vs Latin1, longest vs first match, etc.). 50*ccdc9c3eSSadaf Ebrahimi class TestInstance { 51*ccdc9c3eSSadaf Ebrahimi public: 52*ccdc9c3eSSadaf Ebrahimi struct Result; 53*ccdc9c3eSSadaf Ebrahimi 54*ccdc9c3eSSadaf Ebrahimi TestInstance(const StringPiece& regexp, Prog::MatchKind kind, 55*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags flags); 56*ccdc9c3eSSadaf Ebrahimi ~TestInstance(); flags()57*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags flags() { return flags_; } error()58*ccdc9c3eSSadaf Ebrahimi bool error() { return error_; } 59*ccdc9c3eSSadaf Ebrahimi 60*ccdc9c3eSSadaf Ebrahimi // Runs a single test case: search in text, which is in context, 61*ccdc9c3eSSadaf Ebrahimi // using the given anchoring. 62*ccdc9c3eSSadaf Ebrahimi bool RunCase(const StringPiece& text, const StringPiece& context, 63*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor); 64*ccdc9c3eSSadaf Ebrahimi 65*ccdc9c3eSSadaf Ebrahimi private: 66*ccdc9c3eSSadaf Ebrahimi // Runs a single search using the named engine type. 67*ccdc9c3eSSadaf Ebrahimi void RunSearch(Engine type, 68*ccdc9c3eSSadaf Ebrahimi const StringPiece& text, const StringPiece& context, 69*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor, 70*ccdc9c3eSSadaf Ebrahimi Result *result); 71*ccdc9c3eSSadaf Ebrahimi 72*ccdc9c3eSSadaf Ebrahimi void LogMatch(const char* prefix, Engine e, const StringPiece& text, 73*ccdc9c3eSSadaf Ebrahimi const StringPiece& context, Prog::Anchor anchor); 74*ccdc9c3eSSadaf Ebrahimi 75*ccdc9c3eSSadaf Ebrahimi const StringPiece regexp_str_; // regexp being tested 76*ccdc9c3eSSadaf Ebrahimi Prog::MatchKind kind_; // kind of match 77*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags flags_; // flags for parsing regexp_str_ 78*ccdc9c3eSSadaf Ebrahimi bool error_; // error during constructor? 79*ccdc9c3eSSadaf Ebrahimi 80*ccdc9c3eSSadaf Ebrahimi Regexp* regexp_; // parsed regexp 81*ccdc9c3eSSadaf Ebrahimi int num_captures_; // regexp_->NumCaptures() cached 82*ccdc9c3eSSadaf Ebrahimi Prog* prog_; // compiled program 83*ccdc9c3eSSadaf Ebrahimi Prog* rprog_; // compiled reverse program 84*ccdc9c3eSSadaf Ebrahimi PCRE* re_; // PCRE implementation 85*ccdc9c3eSSadaf Ebrahimi RE2* re2_; // RE2 implementation 86*ccdc9c3eSSadaf Ebrahimi 87*ccdc9c3eSSadaf Ebrahimi TestInstance(const TestInstance&) = delete; 88*ccdc9c3eSSadaf Ebrahimi TestInstance& operator=(const TestInstance&) = delete; 89*ccdc9c3eSSadaf Ebrahimi }; 90*ccdc9c3eSSadaf Ebrahimi 91*ccdc9c3eSSadaf Ebrahimi // A group of TestInstances for all possible configurations. 92*ccdc9c3eSSadaf Ebrahimi class Tester { 93*ccdc9c3eSSadaf Ebrahimi public: 94*ccdc9c3eSSadaf Ebrahimi explicit Tester(const StringPiece& regexp); 95*ccdc9c3eSSadaf Ebrahimi ~Tester(); 96*ccdc9c3eSSadaf Ebrahimi error()97*ccdc9c3eSSadaf Ebrahimi bool error() { return error_; } 98*ccdc9c3eSSadaf Ebrahimi 99*ccdc9c3eSSadaf Ebrahimi // Runs a single test case: search in text, which is in context, 100*ccdc9c3eSSadaf Ebrahimi // using the given anchoring. 101*ccdc9c3eSSadaf Ebrahimi bool TestCase(const StringPiece& text, const StringPiece& context, 102*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor); 103*ccdc9c3eSSadaf Ebrahimi 104*ccdc9c3eSSadaf Ebrahimi // Run TestCase(text, text, anchor) for all anchoring modes. 105*ccdc9c3eSSadaf Ebrahimi bool TestInput(const StringPiece& text); 106*ccdc9c3eSSadaf Ebrahimi 107*ccdc9c3eSSadaf Ebrahimi // Run TestCase(text, context, anchor) for all anchoring modes. 108*ccdc9c3eSSadaf Ebrahimi bool TestInputInContext(const StringPiece& text, const StringPiece& context); 109*ccdc9c3eSSadaf Ebrahimi 110*ccdc9c3eSSadaf Ebrahimi private: 111*ccdc9c3eSSadaf Ebrahimi bool error_; 112*ccdc9c3eSSadaf Ebrahimi std::vector<TestInstance*> v_; 113*ccdc9c3eSSadaf Ebrahimi 114*ccdc9c3eSSadaf Ebrahimi Tester(const Tester&) = delete; 115*ccdc9c3eSSadaf Ebrahimi Tester& operator=(const Tester&) = delete; 116*ccdc9c3eSSadaf Ebrahimi }; 117*ccdc9c3eSSadaf Ebrahimi 118*ccdc9c3eSSadaf Ebrahimi // Run all possible tests using regexp and text. 119*ccdc9c3eSSadaf Ebrahimi bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text); 120*ccdc9c3eSSadaf Ebrahimi 121*ccdc9c3eSSadaf Ebrahimi } // namespace re2 122*ccdc9c3eSSadaf Ebrahimi 123*ccdc9c3eSSadaf Ebrahimi #endif // RE2_TESTING_TESTER_H_ 124