1 /* 2 Copyright (c) Marshall Clow 2010-2012. 3 4 Distributed under the Boost Software License, Version 1.0. (See accompanying 5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 7 For more information, see http://www.boost.org 8 */ 9 10 #ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP 11 #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP 12 13 #include <iterator> // for std::iterator_traits 14 15 #include <boost/config.hpp> 16 #include <boost/assert.hpp> 17 #include <boost/static_assert.hpp> 18 19 #include <boost/range/begin.hpp> 20 #include <boost/range/end.hpp> 21 22 #include <boost/utility/enable_if.hpp> 23 #include <boost/type_traits/is_same.hpp> 24 25 #include <boost/algorithm/searching/detail/bm_traits.hpp> 26 #include <boost/algorithm/searching/detail/debugging.hpp> 27 28 // #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP 29 30 namespace boost { namespace algorithm { 31 32 /* 33 A templated version of the boyer-moore-horspool searching algorithm. 34 35 Requirements: 36 * Random access iterators 37 * The two iterator types (patIter and corpusIter) must 38 "point to" the same underlying type. 39 * Additional requirements may be imposed buy the skip table, such as: 40 ** Numeric type (array-based skip table) 41 ** Hashable type (map-based skip table) 42 43 http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html 44 45 */ 46 47 template <typename patIter, typename traits = detail::BM_traits<patIter> > 48 class boyer_moore_horspool { 49 typedef typename std::iterator_traits<patIter>::difference_type difference_type; 50 public: boyer_moore_horspool(patIter first,patIter last)51 boyer_moore_horspool ( patIter first, patIter last ) 52 : pat_first ( first ), pat_last ( last ), 53 k_pattern_length ( std::distance ( pat_first, pat_last )), 54 skip_ ( k_pattern_length, k_pattern_length ) { 55 56 // Build the skip table 57 std::size_t i = 0; 58 if ( first != last ) // empty pattern? 59 for ( patIter iter = first; iter != last-1; ++iter, ++i ) 60 skip_.insert ( *iter, k_pattern_length - 1 - i ); 61 #ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP 62 skip_.PrintSkipTable (); 63 #endif 64 } 65 ~boyer_moore_horspool()66 ~boyer_moore_horspool () {} 67 68 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last) 69 /// \brief Searches the corpus for the pattern that was passed into the constructor 70 /// 71 /// \param corpus_first The start of the data to search (Random Access Iterator) 72 /// \param corpus_last One past the end of the data to search 73 /// 74 template <typename corpusIter> 75 std::pair<corpusIter, corpusIter> operator ()(corpusIter corpus_first,corpusIter corpus_last) const76 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const { 77 BOOST_STATIC_ASSERT (( boost::is_same< 78 typename std::iterator_traits<patIter>::value_type, 79 typename std::iterator_traits<corpusIter>::value_type>::value )); 80 81 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it! 82 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start 83 84 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last ); 85 // If the pattern is larger than the corpus, we can't find it! 86 if ( k_corpus_length < k_pattern_length ) 87 return std::make_pair(corpus_last, corpus_last); 88 89 // Do the search 90 return this->do_search ( corpus_first, corpus_last ); 91 } 92 93 template <typename Range> 94 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type> operator ()(Range & r) const95 operator () ( Range &r ) const { 96 return (*this) (boost::begin(r), boost::end(r)); 97 } 98 99 private: 100 /// \cond DOXYGEN_HIDE 101 patIter pat_first, pat_last; 102 const difference_type k_pattern_length; 103 typename traits::skip_table_t skip_; 104 105 /// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last ) 106 /// \brief Searches the corpus for the pattern that was passed into the constructor 107 /// 108 /// \param corpus_first The start of the data to search (Random Access Iterator) 109 /// \param corpus_last One past the end of the data to search 110 /// \param k_corpus_length The length of the corpus to search 111 /// 112 template <typename corpusIter> 113 std::pair<corpusIter, corpusIter> do_search(corpusIter corpus_first,corpusIter corpus_last) const114 do_search ( corpusIter corpus_first, corpusIter corpus_last ) const { 115 corpusIter curPos = corpus_first; 116 const corpusIter lastPos = corpus_last - k_pattern_length; 117 while ( curPos <= lastPos ) { 118 // Do we match right where we are? 119 std::size_t j = k_pattern_length - 1; 120 while ( pat_first [j] == curPos [j] ) { 121 // We matched - we're done! 122 if ( j == 0 ) 123 return std::make_pair(curPos, curPos + k_pattern_length); 124 j--; 125 } 126 127 curPos += skip_ [ curPos [ k_pattern_length - 1 ]]; 128 } 129 130 return std::make_pair(corpus_last, corpus_last); 131 } 132 // \endcond 133 }; 134 135 /* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters 136 Use a bit of TMP to disambiguate the 3-argument templates */ 137 138 /// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last, 139 /// patIter pat_first, patIter pat_last ) 140 /// \brief Searches the corpus for the pattern. 141 /// 142 /// \param corpus_first The start of the data to search (Random Access Iterator) 143 /// \param corpus_last One past the end of the data to search 144 /// \param pat_first The start of the pattern to search for (Random Access Iterator) 145 /// \param pat_last One past the end of the data to search for 146 /// 147 template <typename patIter, typename corpusIter> boyer_moore_horspool_search(corpusIter corpus_first,corpusIter corpus_last,patIter pat_first,patIter pat_last)148 std::pair<corpusIter, corpusIter> boyer_moore_horspool_search ( 149 corpusIter corpus_first, corpusIter corpus_last, 150 patIter pat_first, patIter pat_last ) 151 { 152 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last ); 153 return bmh ( corpus_first, corpus_last ); 154 } 155 156 template <typename PatternRange, typename corpusIter> boyer_moore_horspool_search(corpusIter corpus_first,corpusIter corpus_last,const PatternRange & pattern)157 std::pair<corpusIter, corpusIter> boyer_moore_horspool_search ( 158 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern ) 159 { 160 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator; 161 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern)); 162 return bmh ( corpus_first, corpus_last ); 163 } 164 165 template <typename patIter, typename CorpusRange> 166 typename boost::disable_if_c< 167 boost::is_same<CorpusRange, patIter>::value, 168 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> > 169 ::type boyer_moore_horspool_search(CorpusRange & corpus,patIter pat_first,patIter pat_last)170 boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last ) 171 { 172 boyer_moore_horspool<patIter> bmh ( pat_first, pat_last ); 173 return bm (boost::begin (corpus), boost::end (corpus)); 174 } 175 176 template <typename PatternRange, typename CorpusRange> 177 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> boyer_moore_horspool_search(CorpusRange & corpus,const PatternRange & pattern)178 boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern ) 179 { 180 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator; 181 boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern)); 182 return bmh (boost::begin (corpus), boost::end (corpus)); 183 } 184 185 186 // Creator functions -- take a pattern range, return an object 187 template <typename Range> 188 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type> make_boyer_moore_horspool(const Range & r)189 make_boyer_moore_horspool ( const Range &r ) { 190 return boost::algorithm::boyer_moore_horspool 191 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r)); 192 } 193 194 template <typename Range> 195 boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type> make_boyer_moore_horspool(Range & r)196 make_boyer_moore_horspool ( Range &r ) { 197 return boost::algorithm::boyer_moore_horspool 198 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r)); 199 } 200 201 }} 202 203 #endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP 204