1 // Copyright 2009 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #ifndef RE2_PREFILTER_H_ 6 #define RE2_PREFILTER_H_ 7 8 // Prefilter is the class used to extract string guards from regexps. 9 // Rather than using Prefilter class directly, use FilteredRE2. 10 // See filtered_re2.h 11 12 #include <set> 13 #include <string> 14 #include <vector> 15 16 #include "util/logging.h" 17 18 namespace re2 { 19 20 class RE2; 21 22 class Regexp; 23 24 class Prefilter { 25 // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h 26 public: 27 enum Op { 28 ALL = 0, // Everything matches 29 NONE, // Nothing matches 30 ATOM, // The string atom() must match 31 AND, // All in subs() must match 32 OR, // One of subs() must match 33 }; 34 35 explicit Prefilter(Op op); 36 ~Prefilter(); 37 op()38 Op op() { return op_; } atom()39 const std::string& atom() const { return atom_; } set_unique_id(int id)40 void set_unique_id(int id) { unique_id_ = id; } unique_id()41 int unique_id() const { return unique_id_; } 42 43 // The children of the Prefilter node. subs()44 std::vector<Prefilter*>* subs() { 45 DCHECK(op_ == AND || op_ == OR); 46 return subs_; 47 } 48 49 // Set the children vector. Prefilter takes ownership of subs and 50 // subs_ will be deleted when Prefilter is deleted. set_subs(std::vector<Prefilter * > * subs)51 void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } 52 53 // Given a RE2, return a Prefilter. The caller takes ownership of 54 // the Prefilter and should deallocate it. Returns NULL if Prefilter 55 // cannot be formed. 56 static Prefilter* FromRE2(const RE2* re2); 57 58 // Returns a readable debug string of the prefilter. 59 std::string DebugString() const; 60 61 private: 62 template <typename H> AbslHashValue(H h,const Prefilter & a)63 friend H AbslHashValue(H h, const Prefilter& a) { 64 h = H::combine(std::move(h), a.op_); 65 if (a.op_ == ATOM) { 66 h = H::combine(std::move(h), a.atom_); 67 } else if (a.op_ == AND || a.op_ == OR) { 68 h = H::combine(std::move(h), a.subs_->size()); 69 for (size_t i = 0; i < a.subs_->size(); ++i) { 70 h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_); 71 } 72 } 73 return h; 74 } 75 76 friend bool operator==(const Prefilter& a, const Prefilter& b) { 77 if (&a == &b) { 78 return true; 79 } 80 if (a.op_ != b.op_) { 81 return false; 82 } 83 if (a.op_ == ATOM) { 84 if (a.atom_ != b.atom_) { 85 return false; 86 } 87 } else if (a.op_ == AND || a.op_ == OR) { 88 if (a.subs_->size() != b.subs_->size()) { 89 return false; 90 } 91 for (size_t i = 0; i < a.subs_->size(); ++i) { 92 if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) { 93 return false; 94 } 95 } 96 } 97 return true; 98 } 99 100 // A comparator used to store exact strings. We compare by length, 101 // then lexicographically. This ordering makes it easier to reduce the 102 // set of strings in SimplifyStringSet. 103 struct LengthThenLex { operatorLengthThenLex104 bool operator()(const std::string& a, const std::string& b) const { 105 return (a.size() < b.size()) || (a.size() == b.size() && a < b); 106 } 107 }; 108 109 class Info; 110 111 using SSet = std::set<std::string, LengthThenLex>; 112 using SSIter = SSet::iterator; 113 using ConstSSIter = SSet::const_iterator; 114 115 // Combines two prefilters together to create an AND. The passed 116 // Prefilters will be part of the returned Prefilter or deleted. 117 static Prefilter* And(Prefilter* a, Prefilter* b); 118 119 // Combines two prefilters together to create an OR. The passed 120 // Prefilters will be part of the returned Prefilter or deleted. 121 static Prefilter* Or(Prefilter* a, Prefilter* b); 122 123 // Generalized And/Or 124 static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); 125 126 static Prefilter* FromRegexp(Regexp* a); 127 128 static Prefilter* FromString(const std::string& str); 129 130 static Prefilter* OrStrings(SSet* ss); 131 132 static Info* BuildInfo(Regexp* re); 133 134 Prefilter* Simplify(); 135 136 // Removes redundant strings from the set. A string is redundant if 137 // any of the other strings appear as a substring. The empty string 138 // is a special case, which is ignored. 139 static void SimplifyStringSet(SSet* ss); 140 141 // Adds the cross-product of a and b to dst. 142 // (For each string i in a and j in b, add i+j.) 143 static void CrossProduct(const SSet& a, const SSet& b, SSet* dst); 144 145 // Kind of Prefilter. 146 Op op_; 147 148 // Sub-matches for AND or OR Prefilter. 149 std::vector<Prefilter*>* subs_; 150 151 // Actual string to match in leaf node. 152 std::string atom_; 153 154 // If different prefilters have the same string atom, or if they are 155 // structurally the same (e.g., OR of same atom strings) they are 156 // considered the same unique nodes. This is the id for each unique 157 // node. This field is populated with a unique id for every node, 158 // and -1 for duplicate nodes. 159 int unique_id_; 160 161 Prefilter(const Prefilter&) = delete; 162 Prefilter& operator=(const Prefilter&) = delete; 163 }; 164 165 } // namespace re2 166 167 #endif // RE2_PREFILTER_H_ 168