1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors. All Rights Reserved. 2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style 3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file. 4*ccdc9c3eSSadaf Ebrahimi 5*ccdc9c3eSSadaf Ebrahimi #ifndef RE2_TESTING_REGEXP_GENERATOR_H_ 6*ccdc9c3eSSadaf Ebrahimi #define RE2_TESTING_REGEXP_GENERATOR_H_ 7*ccdc9c3eSSadaf Ebrahimi 8*ccdc9c3eSSadaf Ebrahimi // Regular expression generator: generates all possible 9*ccdc9c3eSSadaf Ebrahimi // regular expressions within given parameters (see below for details). 10*ccdc9c3eSSadaf Ebrahimi 11*ccdc9c3eSSadaf Ebrahimi #include <stdint.h> 12*ccdc9c3eSSadaf Ebrahimi #include <random> 13*ccdc9c3eSSadaf Ebrahimi #include <string> 14*ccdc9c3eSSadaf Ebrahimi #include <vector> 15*ccdc9c3eSSadaf Ebrahimi 16*ccdc9c3eSSadaf Ebrahimi #include "util/util.h" 17*ccdc9c3eSSadaf Ebrahimi #include "re2/stringpiece.h" 18*ccdc9c3eSSadaf Ebrahimi 19*ccdc9c3eSSadaf Ebrahimi namespace re2 { 20*ccdc9c3eSSadaf Ebrahimi 21*ccdc9c3eSSadaf Ebrahimi // Regular expression generator. 22*ccdc9c3eSSadaf Ebrahimi // 23*ccdc9c3eSSadaf Ebrahimi // Given a set of atom expressions like "a", "b", or "." 24*ccdc9c3eSSadaf Ebrahimi // and operators like "%s*", generates all possible regular expressions 25*ccdc9c3eSSadaf Ebrahimi // using at most maxbases base expressions and maxops operators. 26*ccdc9c3eSSadaf Ebrahimi // For each such expression re, calls HandleRegexp(re). 27*ccdc9c3eSSadaf Ebrahimi // 28*ccdc9c3eSSadaf Ebrahimi // Callers are expected to subclass RegexpGenerator and provide HandleRegexp. 29*ccdc9c3eSSadaf Ebrahimi // 30*ccdc9c3eSSadaf Ebrahimi class RegexpGenerator { 31*ccdc9c3eSSadaf Ebrahimi public: 32*ccdc9c3eSSadaf Ebrahimi RegexpGenerator(int maxatoms, int maxops, const std::vector<string>& atoms, 33*ccdc9c3eSSadaf Ebrahimi const std::vector<string>& ops); ~RegexpGenerator()34*ccdc9c3eSSadaf Ebrahimi virtual ~RegexpGenerator() {} 35*ccdc9c3eSSadaf Ebrahimi 36*ccdc9c3eSSadaf Ebrahimi // Generates all the regular expressions, calling HandleRegexp(re) for each. 37*ccdc9c3eSSadaf Ebrahimi void Generate(); 38*ccdc9c3eSSadaf Ebrahimi 39*ccdc9c3eSSadaf Ebrahimi // Generates n random regular expressions, calling HandleRegexp(re) for each. 40*ccdc9c3eSSadaf Ebrahimi void GenerateRandom(int32_t seed, int n); 41*ccdc9c3eSSadaf Ebrahimi 42*ccdc9c3eSSadaf Ebrahimi // Handles a regular expression. Must be provided by subclass. 43*ccdc9c3eSSadaf Ebrahimi virtual void HandleRegexp(const string& regexp) = 0; 44*ccdc9c3eSSadaf Ebrahimi 45*ccdc9c3eSSadaf Ebrahimi // The egrep regexp operators: * + ? | and concatenation. 46*ccdc9c3eSSadaf Ebrahimi static const std::vector<string>& EgrepOps(); 47*ccdc9c3eSSadaf Ebrahimi 48*ccdc9c3eSSadaf Ebrahimi private: 49*ccdc9c3eSSadaf Ebrahimi void RunPostfix(const std::vector<string>& post); 50*ccdc9c3eSSadaf Ebrahimi void GeneratePostfix(std::vector<string>* post, int nstk, int ops, int lits); 51*ccdc9c3eSSadaf Ebrahimi bool GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops, 52*ccdc9c3eSSadaf Ebrahimi int lits); 53*ccdc9c3eSSadaf Ebrahimi 54*ccdc9c3eSSadaf Ebrahimi int maxatoms_; // Maximum number of atoms allowed in expr. 55*ccdc9c3eSSadaf Ebrahimi int maxops_; // Maximum number of ops allowed in expr. 56*ccdc9c3eSSadaf Ebrahimi std::vector<string> atoms_; // Possible atoms. 57*ccdc9c3eSSadaf Ebrahimi std::vector<string> ops_; // Possible ops. 58*ccdc9c3eSSadaf Ebrahimi std::minstd_rand0 rng_; // Random number generator. 59*ccdc9c3eSSadaf Ebrahimi 60*ccdc9c3eSSadaf Ebrahimi RegexpGenerator(const RegexpGenerator&) = delete; 61*ccdc9c3eSSadaf Ebrahimi RegexpGenerator& operator=(const RegexpGenerator&) = delete; 62*ccdc9c3eSSadaf Ebrahimi }; 63*ccdc9c3eSSadaf Ebrahimi 64*ccdc9c3eSSadaf Ebrahimi // Helpers for preparing arguments to RegexpGenerator constructor. 65*ccdc9c3eSSadaf Ebrahimi 66*ccdc9c3eSSadaf Ebrahimi // Returns one string for each character in s. 67*ccdc9c3eSSadaf Ebrahimi std::vector<string> Explode(const StringPiece& s); 68*ccdc9c3eSSadaf Ebrahimi 69*ccdc9c3eSSadaf Ebrahimi // Splits string everywhere sep is found, returning 70*ccdc9c3eSSadaf Ebrahimi // vector of pieces. 71*ccdc9c3eSSadaf Ebrahimi std::vector<string> Split(const StringPiece& sep, const StringPiece& s); 72*ccdc9c3eSSadaf Ebrahimi 73*ccdc9c3eSSadaf Ebrahimi } // namespace re2 74*ccdc9c3eSSadaf Ebrahimi 75*ccdc9c3eSSadaf Ebrahimi #endif // RE2_TESTING_REGEXP_GENERATOR_H_ 76