xref: /aosp_15_r20/external/regex-re2/re2/testing/regexp_generator.h (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi #ifndef RE2_TESTING_REGEXP_GENERATOR_H_
6*ccdc9c3eSSadaf Ebrahimi #define RE2_TESTING_REGEXP_GENERATOR_H_
7*ccdc9c3eSSadaf Ebrahimi 
8*ccdc9c3eSSadaf Ebrahimi // Regular expression generator: generates all possible
9*ccdc9c3eSSadaf Ebrahimi // regular expressions within given parameters (see below for details).
10*ccdc9c3eSSadaf Ebrahimi 
11*ccdc9c3eSSadaf Ebrahimi #include <stdint.h>
12*ccdc9c3eSSadaf Ebrahimi #include <random>
13*ccdc9c3eSSadaf Ebrahimi #include <string>
14*ccdc9c3eSSadaf Ebrahimi #include <vector>
15*ccdc9c3eSSadaf Ebrahimi 
16*ccdc9c3eSSadaf Ebrahimi #include "util/util.h"
17*ccdc9c3eSSadaf Ebrahimi #include "re2/stringpiece.h"
18*ccdc9c3eSSadaf Ebrahimi 
19*ccdc9c3eSSadaf Ebrahimi namespace re2 {
20*ccdc9c3eSSadaf Ebrahimi 
21*ccdc9c3eSSadaf Ebrahimi // Regular expression generator.
22*ccdc9c3eSSadaf Ebrahimi //
23*ccdc9c3eSSadaf Ebrahimi // Given a set of atom expressions like "a", "b", or "."
24*ccdc9c3eSSadaf Ebrahimi // and operators like "%s*", generates all possible regular expressions
25*ccdc9c3eSSadaf Ebrahimi // using at most maxbases base expressions and maxops operators.
26*ccdc9c3eSSadaf Ebrahimi // For each such expression re, calls HandleRegexp(re).
27*ccdc9c3eSSadaf Ebrahimi //
28*ccdc9c3eSSadaf Ebrahimi // Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
29*ccdc9c3eSSadaf Ebrahimi //
30*ccdc9c3eSSadaf Ebrahimi class RegexpGenerator {
31*ccdc9c3eSSadaf Ebrahimi  public:
32*ccdc9c3eSSadaf Ebrahimi   RegexpGenerator(int maxatoms, int maxops, const std::vector<string>& atoms,
33*ccdc9c3eSSadaf Ebrahimi                   const std::vector<string>& ops);
~RegexpGenerator()34*ccdc9c3eSSadaf Ebrahimi   virtual ~RegexpGenerator() {}
35*ccdc9c3eSSadaf Ebrahimi 
36*ccdc9c3eSSadaf Ebrahimi   // Generates all the regular expressions, calling HandleRegexp(re) for each.
37*ccdc9c3eSSadaf Ebrahimi   void Generate();
38*ccdc9c3eSSadaf Ebrahimi 
39*ccdc9c3eSSadaf Ebrahimi   // Generates n random regular expressions, calling HandleRegexp(re) for each.
40*ccdc9c3eSSadaf Ebrahimi   void GenerateRandom(int32_t seed, int n);
41*ccdc9c3eSSadaf Ebrahimi 
42*ccdc9c3eSSadaf Ebrahimi   // Handles a regular expression.  Must be provided by subclass.
43*ccdc9c3eSSadaf Ebrahimi   virtual void HandleRegexp(const string& regexp) = 0;
44*ccdc9c3eSSadaf Ebrahimi 
45*ccdc9c3eSSadaf Ebrahimi   // The egrep regexp operators: * + ? | and concatenation.
46*ccdc9c3eSSadaf Ebrahimi   static const std::vector<string>& EgrepOps();
47*ccdc9c3eSSadaf Ebrahimi 
48*ccdc9c3eSSadaf Ebrahimi  private:
49*ccdc9c3eSSadaf Ebrahimi   void RunPostfix(const std::vector<string>& post);
50*ccdc9c3eSSadaf Ebrahimi   void GeneratePostfix(std::vector<string>* post, int nstk, int ops, int lits);
51*ccdc9c3eSSadaf Ebrahimi   bool GenerateRandomPostfix(std::vector<string>* post, int nstk, int ops,
52*ccdc9c3eSSadaf Ebrahimi                              int lits);
53*ccdc9c3eSSadaf Ebrahimi 
54*ccdc9c3eSSadaf Ebrahimi   int maxatoms_;               // Maximum number of atoms allowed in expr.
55*ccdc9c3eSSadaf Ebrahimi   int maxops_;                 // Maximum number of ops allowed in expr.
56*ccdc9c3eSSadaf Ebrahimi   std::vector<string> atoms_;  // Possible atoms.
57*ccdc9c3eSSadaf Ebrahimi   std::vector<string> ops_;    // Possible ops.
58*ccdc9c3eSSadaf Ebrahimi   std::minstd_rand0 rng_;      // Random number generator.
59*ccdc9c3eSSadaf Ebrahimi 
60*ccdc9c3eSSadaf Ebrahimi   RegexpGenerator(const RegexpGenerator&) = delete;
61*ccdc9c3eSSadaf Ebrahimi   RegexpGenerator& operator=(const RegexpGenerator&) = delete;
62*ccdc9c3eSSadaf Ebrahimi };
63*ccdc9c3eSSadaf Ebrahimi 
64*ccdc9c3eSSadaf Ebrahimi // Helpers for preparing arguments to RegexpGenerator constructor.
65*ccdc9c3eSSadaf Ebrahimi 
66*ccdc9c3eSSadaf Ebrahimi // Returns one string for each character in s.
67*ccdc9c3eSSadaf Ebrahimi std::vector<string> Explode(const StringPiece& s);
68*ccdc9c3eSSadaf Ebrahimi 
69*ccdc9c3eSSadaf Ebrahimi // Splits string everywhere sep is found, returning
70*ccdc9c3eSSadaf Ebrahimi // vector of pieces.
71*ccdc9c3eSSadaf Ebrahimi std::vector<string> Split(const StringPiece& sep, const StringPiece& s);
72*ccdc9c3eSSadaf Ebrahimi 
73*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
74*ccdc9c3eSSadaf Ebrahimi 
75*ccdc9c3eSSadaf Ebrahimi #endif  // RE2_TESTING_REGEXP_GENERATOR_H_
76