1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM) 7 #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 14 #include <boost/spirit/home/support/detail/lexer/consts.hpp> 15 #include <boost/spirit/home/support/detail/lexer/size_t.hpp> 16 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> 17 #include <iterator> // for std::iterator_traits 18 #include <vector> 19 20 namespace boost { namespace spirit { namespace lex { namespace lexertl 21 { 22 /////////////////////////////////////////////////////////////////////////// 23 template<typename Iterator> 24 class basic_iterator_tokeniser 25 { 26 public: 27 typedef std::vector<std::size_t> size_t_vector; 28 typedef typename std::iterator_traits<Iterator>::value_type char_type; 29 next(boost::lexer::basic_state_machine<char_type> const & state_machine_,std::size_t & dfa_state_,bool & bol_,Iterator & start_token_,Iterator const & end_,std::size_t & unique_id_)30 static std::size_t next ( 31 boost::lexer::basic_state_machine<char_type> const& state_machine_ 32 , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_ 33 , Iterator const& end_, std::size_t& unique_id_) 34 { 35 if (start_token_ == end_) 36 { 37 unique_id_ = boost::lexer::npos; 38 return 0; 39 } 40 41 bool bol = bol_; 42 boost::lexer::detail::internals const& internals_ = 43 state_machine_.data(); 44 45 again: 46 std::size_t const* lookup_ = &internals_._lookup[dfa_state_]-> 47 front (); 48 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_]; 49 std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front (); 50 51 std::size_t const* ptr_ = dfa_ + dfa_alphabet_; 52 Iterator curr_ = start_token_; 53 bool end_state_ = *ptr_ != 0; 54 std::size_t id_ = *(ptr_ + boost::lexer::id_index); 55 std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); 56 std::size_t end_start_state_ = dfa_state_; 57 bool end_bol_ = bol_; 58 Iterator end_token_ = start_token_; 59 60 while (curr_ != end_) 61 { 62 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; 63 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; 64 65 if (BOL_state_ && bol) 66 { 67 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; 68 } 69 else if (EOL_state_ && *curr_ == '\n') 70 { 71 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; 72 } 73 else 74 { 75 typedef typename 76 std::iterator_traits<Iterator>::value_type 77 value_type; 78 typedef typename 79 boost::lexer::char_traits<value_type>::index_type 80 index_type; 81 82 index_type index = 83 boost::lexer::char_traits<value_type>::call(*curr_++); 84 bol = (index == '\n') ? true : false; 85 std::size_t const state_ = ptr_[ 86 lookup_[static_cast<std::size_t>(index)]]; 87 88 if (state_ == 0) 89 { 90 break; 91 } 92 93 ptr_ = &dfa_[state_ * dfa_alphabet_]; 94 } 95 96 if (*ptr_) 97 { 98 end_state_ = true; 99 id_ = *(ptr_ + boost::lexer::id_index); 100 uid_ = *(ptr_ + boost::lexer::unique_id_index); 101 end_start_state_ = *(ptr_ + boost::lexer::state_index); 102 end_bol_ = bol; 103 end_token_ = curr_; 104 } 105 } 106 107 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; 108 109 if (EOL_state_ && curr_ == end_) 110 { 111 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; 112 113 if (*ptr_) 114 { 115 end_state_ = true; 116 id_ = *(ptr_ + boost::lexer::id_index); 117 uid_ = *(ptr_ + boost::lexer::unique_id_index); 118 end_start_state_ = *(ptr_ + boost::lexer::state_index); 119 end_bol_ = bol; 120 end_token_ = curr_; 121 } 122 } 123 124 if (end_state_) { 125 // return longest match 126 dfa_state_ = end_start_state_; 127 start_token_ = end_token_; 128 129 if (id_ == 0) 130 { 131 bol = end_bol_; 132 goto again; 133 } 134 else 135 { 136 bol_ = end_bol_; 137 } 138 } 139 else { 140 bol_ = (*start_token_ == '\n') ? true : false; 141 id_ = boost::lexer::npos; 142 uid_ = boost::lexer::npos; 143 } 144 145 unique_id_ = uid_; 146 return id_; 147 } 148 149 /////////////////////////////////////////////////////////////////////// next(boost::lexer::basic_state_machine<char_type> const & state_machine_,bool & bol_,Iterator & start_token_,Iterator const & end_,std::size_t & unique_id_)150 static std::size_t next ( 151 boost::lexer::basic_state_machine<char_type> const& state_machine_ 152 , bool& bol_, Iterator &start_token_, Iterator const& end_ 153 , std::size_t& unique_id_) 154 { 155 if (start_token_ == end_) 156 { 157 unique_id_ = boost::lexer::npos; 158 return 0; 159 } 160 161 bool bol = bol_; 162 std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front(); 163 std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0]; 164 std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front (); 165 std::size_t const* ptr_ = dfa_ + dfa_alphabet_; 166 167 Iterator curr_ = start_token_; 168 bool end_state_ = *ptr_ != 0; 169 std::size_t id_ = *(ptr_ + boost::lexer::id_index); 170 std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); 171 bool end_bol_ = bol_; 172 Iterator end_token_ = start_token_; 173 174 while (curr_ != end_) 175 { 176 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; 177 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; 178 179 if (BOL_state_ && bol) 180 { 181 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; 182 } 183 else if (EOL_state_ && *curr_ == '\n') 184 { 185 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; 186 } 187 else 188 { 189 typedef typename 190 std::iterator_traits<Iterator>::value_type 191 value_type; 192 typedef typename 193 boost::lexer::char_traits<value_type>::index_type 194 index_type; 195 196 index_type index = 197 boost::lexer::char_traits<value_type>::call(*curr_++); 198 bol = (index == '\n') ? true : false; 199 std::size_t const state_ = ptr_[ 200 lookup_[static_cast<std::size_t>(index)]]; 201 202 if (state_ == 0) 203 { 204 break; 205 } 206 207 ptr_ = &dfa_[state_ * dfa_alphabet_]; 208 } 209 210 if (*ptr_) 211 { 212 end_state_ = true; 213 id_ = *(ptr_ + boost::lexer::id_index); 214 uid_ = *(ptr_ + boost::lexer::unique_id_index); 215 end_bol_ = bol; 216 end_token_ = curr_; 217 } 218 } 219 220 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; 221 222 if (EOL_state_ && curr_ == end_) 223 { 224 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; 225 226 if (*ptr_) 227 { 228 end_state_ = true; 229 id_ = *(ptr_ + boost::lexer::id_index); 230 uid_ = *(ptr_ + boost::lexer::unique_id_index); 231 end_bol_ = bol; 232 end_token_ = curr_; 233 } 234 } 235 236 if (end_state_) { 237 // return longest match 238 bol_ = end_bol_; 239 start_token_ = end_token_; 240 } 241 else { 242 bol_ = *start_token_ == '\n'; 243 id_ = boost::lexer::npos; 244 uid_ = boost::lexer::npos; 245 } 246 247 unique_id_ = uid_; 248 return id_; 249 } 250 }; 251 252 }}}} 253 254 #endif 255