xref: /aosp_15_r20/external/cronet/third_party/re2/src/re2/prefilter.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_PREFILTER_H_
6 #define RE2_PREFILTER_H_
7 
8 // Prefilter is the class used to extract string guards from regexps.
9 // Rather than using Prefilter class directly, use FilteredRE2.
10 // See filtered_re2.h
11 
12 #include <set>
13 #include <string>
14 #include <vector>
15 
16 #include "util/logging.h"
17 
18 namespace re2 {
19 
20 class RE2;
21 
22 class Regexp;
23 
24 class Prefilter {
25   // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
26  public:
27   enum Op {
28     ALL = 0,  // Everything matches
29     NONE,  // Nothing matches
30     ATOM,  // The string atom() must match
31     AND,   // All in subs() must match
32     OR,   // One of subs() must match
33   };
34 
35   explicit Prefilter(Op op);
36   ~Prefilter();
37 
op()38   Op op() { return op_; }
atom()39   const std::string& atom() const { return atom_; }
set_unique_id(int id)40   void set_unique_id(int id) { unique_id_ = id; }
unique_id()41   int unique_id() const { return unique_id_; }
42 
43   // The children of the Prefilter node.
subs()44   std::vector<Prefilter*>* subs() {
45     DCHECK(op_ == AND || op_ == OR);
46     return subs_;
47   }
48 
49   // Set the children vector. Prefilter takes ownership of subs and
50   // subs_ will be deleted when Prefilter is deleted.
set_subs(std::vector<Prefilter * > * subs)51   void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
52 
53   // Given a RE2, return a Prefilter. The caller takes ownership of
54   // the Prefilter and should deallocate it. Returns NULL if Prefilter
55   // cannot be formed.
56   static Prefilter* FromRE2(const RE2* re2);
57 
58   // Returns a readable debug string of the prefilter.
59   std::string DebugString() const;
60 
61  private:
62   template <typename H>
AbslHashValue(H h,const Prefilter & a)63   friend H AbslHashValue(H h, const Prefilter& a) {
64     h = H::combine(std::move(h), a.op_);
65     if (a.op_ == ATOM) {
66       h = H::combine(std::move(h), a.atom_);
67     } else if (a.op_ == AND || a.op_ == OR) {
68       h = H::combine(std::move(h), a.subs_->size());
69       for (size_t i = 0; i < a.subs_->size(); ++i) {
70         h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_);
71       }
72     }
73     return h;
74   }
75 
76   friend bool operator==(const Prefilter& a, const Prefilter& b) {
77     if (&a == &b) {
78       return true;
79     }
80     if (a.op_ != b.op_) {
81       return false;
82     }
83     if (a.op_ == ATOM) {
84       if (a.atom_ != b.atom_) {
85         return false;
86       }
87     } else if (a.op_ == AND || a.op_ == OR) {
88       if (a.subs_->size() != b.subs_->size()) {
89         return false;
90       }
91       for (size_t i = 0; i < a.subs_->size(); ++i) {
92         if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) {
93           return false;
94         }
95       }
96     }
97     return true;
98   }
99 
100   // A comparator used to store exact strings. We compare by length,
101   // then lexicographically. This ordering makes it easier to reduce the
102   // set of strings in SimplifyStringSet.
103   struct LengthThenLex {
operatorLengthThenLex104     bool operator()(const std::string& a, const std::string& b) const {
105        return (a.size() < b.size()) || (a.size() == b.size() && a < b);
106     }
107   };
108 
109   class Info;
110 
111   using SSet = std::set<std::string, LengthThenLex>;
112   using SSIter = SSet::iterator;
113   using ConstSSIter = SSet::const_iterator;
114 
115   // Combines two prefilters together to create an AND. The passed
116   // Prefilters will be part of the returned Prefilter or deleted.
117   static Prefilter* And(Prefilter* a, Prefilter* b);
118 
119   // Combines two prefilters together to create an OR. The passed
120   // Prefilters will be part of the returned Prefilter or deleted.
121   static Prefilter* Or(Prefilter* a, Prefilter* b);
122 
123   // Generalized And/Or
124   static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
125 
126   static Prefilter* FromRegexp(Regexp* a);
127 
128   static Prefilter* FromString(const std::string& str);
129 
130   static Prefilter* OrStrings(SSet* ss);
131 
132   static Info* BuildInfo(Regexp* re);
133 
134   Prefilter* Simplify();
135 
136   // Removes redundant strings from the set. A string is redundant if
137   // any of the other strings appear as a substring. The empty string
138   // is a special case, which is ignored.
139   static void SimplifyStringSet(SSet* ss);
140 
141   // Adds the cross-product of a and b to dst.
142   // (For each string i in a and j in b, add i+j.)
143   static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
144 
145   // Kind of Prefilter.
146   Op op_;
147 
148   // Sub-matches for AND or OR Prefilter.
149   std::vector<Prefilter*>* subs_;
150 
151   // Actual string to match in leaf node.
152   std::string atom_;
153 
154   // If different prefilters have the same string atom, or if they are
155   // structurally the same (e.g., OR of same atom strings) they are
156   // considered the same unique nodes. This is the id for each unique
157   // node. This field is populated with a unique id for every node,
158   // and -1 for duplicate nodes.
159   int unique_id_;
160 
161   Prefilter(const Prefilter&) = delete;
162   Prefilter& operator=(const Prefilter&) = delete;
163 };
164 
165 }  // namespace re2
166 
167 #endif  // RE2_PREFILTER_H_
168