1 // state_machine.hpp 2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP 7 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP 8 9 #include <algorithm> 10 #include "conversion/char_state_machine.hpp" 11 #include "consts.hpp" 12 #include <deque> 13 #include "internals.hpp" 14 #include <map> 15 #include "containers/ptr_vector.hpp" 16 #include "size_t.hpp" 17 #include <string> 18 19 namespace boost 20 { 21 namespace lexer 22 { 23 template<typename CharT> 24 class basic_state_machine 25 { 26 public: 27 typedef CharT char_type; 28 29 class iterator 30 { 31 public: 32 friend class basic_state_machine; 33 34 struct data 35 { 36 // Current iterator info 37 std::size_t dfa; 38 std::size_t states; 39 std::size_t state; 40 std::size_t transitions; 41 std::size_t transition; 42 43 // Current state info 44 bool end_state; 45 std::size_t id; 46 std::size_t unique_id; 47 std::size_t goto_dfa; 48 std::size_t bol_index; 49 std::size_t eol_index; 50 51 // Current transition info 52 basic_string_token<CharT> token; 53 std::size_t goto_state; 54 databoost::lexer::basic_state_machine::iterator::data55 data () : 56 dfa (npos), 57 states (0), 58 state (npos), 59 transitions (0), 60 transition (npos), 61 end_state (false), 62 id (npos), 63 unique_id (npos), 64 goto_dfa (npos), 65 bol_index (npos), 66 eol_index (npos), 67 goto_state (npos) 68 { 69 } 70 operator ==boost::lexer::basic_state_machine::iterator::data71 bool operator == (const data &rhs_) const 72 { 73 return dfa == rhs_.dfa && 74 states == rhs_.states && 75 state == rhs_.state && 76 transitions == rhs_.transitions && 77 transition == rhs_.transition && 78 end_state == rhs_.end_state && 79 id == rhs_.id && 80 unique_id == rhs_.unique_id && 81 goto_dfa == rhs_.goto_dfa && 82 bol_index == rhs_.bol_index && 83 eol_index == rhs_.eol_index && 84 token == rhs_.token && 85 goto_state == rhs_.goto_state; 86 } 87 }; 88 iterator()89 iterator () : 90 _sm (0), 91 _dfas (0), 92 _dfa (npos), 93 _states (0), 94 _state (npos), 95 _transitions (0), 96 _transition (npos) 97 { 98 } 99 operator ==(const iterator & rhs_) const100 bool operator == (const iterator &rhs_) const 101 { 102 return _dfas == rhs_._dfas && _dfa == rhs_._dfa && 103 _states == rhs_._states && _state == rhs_._state && 104 _transitions == rhs_._transitions && 105 _transition == rhs_._transition; 106 } 107 operator !=(const iterator & rhs_) const108 bool operator != (const iterator &rhs_) const 109 { 110 return !(*this == rhs_); 111 } 112 operator *()113 data &operator * () 114 { 115 return _data; 116 } 117 operator ->()118 data *operator -> () 119 { 120 return &_data; 121 } 122 123 // Let compiler generate operator = (). 124 125 // prefix version operator ++()126 iterator &operator ++ () 127 { 128 next (); 129 return *this; 130 } 131 132 // postfix version operator ++(int)133 iterator operator ++ (int) 134 { 135 iterator iter_ = *this; 136 137 next (); 138 return iter_; 139 } 140 clear()141 void clear () 142 { 143 _dfas = _states = _transitions = 0; 144 _dfa = _state = _transition = npos; 145 } 146 147 private: 148 basic_state_machine *_sm; 149 data _data; 150 std::size_t _dfas; 151 std::size_t _dfa; 152 std::size_t _states; 153 std::size_t _state; 154 std::size_t _transitions; 155 std::size_t _transition; 156 typename detail::basic_char_state_machine<CharT>::state:: 157 size_t_string_token_map::const_iterator _token_iter; 158 typename detail::basic_char_state_machine<CharT>::state:: 159 size_t_string_token_map::const_iterator _token_end; 160 next()161 void next () 162 { 163 bool reset_state_ = false; 164 165 if (_transition >= _transitions) 166 { 167 _transition = _data.transition = 0; 168 _data.state = ++_state; 169 reset_state_ = true; 170 171 if (_state >= _states) 172 { 173 ++_dfa; 174 175 if (_dfa >= _dfas) 176 { 177 clear (); 178 reset_state_ = false; 179 } 180 else 181 { 182 _states = _data.states = 183 _sm->_csm._sm_vector[_dfa].size (); 184 _state = _data.state = 0; 185 } 186 } 187 } 188 else 189 { 190 _data.transition = _transition; 191 } 192 193 if (reset_state_) 194 { 195 const typename detail::basic_char_state_machine<CharT>:: 196 state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state]; 197 198 _transitions = _data.transitions = ptr_->_transitions.size (); 199 _data.end_state = ptr_->_end_state; 200 _data.id = ptr_->_id; 201 _data.unique_id = ptr_->_unique_id; 202 _data.goto_dfa = ptr_->_state; 203 _data.bol_index = ptr_->_bol_index; 204 _data.eol_index = ptr_->_eol_index; 205 _token_iter = ptr_->_transitions.begin (); 206 _token_end = ptr_->_transitions.end (); 207 } 208 209 if (_token_iter != _token_end) 210 { 211 _data.token = _token_iter->second; 212 _data.goto_state = _token_iter->first; 213 ++_token_iter; 214 ++_transition; 215 } 216 else 217 { 218 _data.token.clear (); 219 _data.goto_state = npos; 220 } 221 } 222 }; 223 224 friend class iterator; 225 basic_state_machine()226 basic_state_machine () 227 { 228 } 229 clear()230 void clear () 231 { 232 _internals.clear (); 233 _csm.clear (); 234 } 235 empty() const236 bool empty () const 237 { 238 // Don't include _csm in this test, as irrelevant to state. 239 return _internals._lookup->empty () && 240 _internals._dfa_alphabet.empty () && 241 _internals._dfa->empty (); 242 } 243 size() const244 std::size_t size () const 245 { 246 return _internals._dfa->size (); 247 } 248 operator ==(const basic_state_machine & rhs_) const249 bool operator == (const basic_state_machine &rhs_) const 250 { 251 // Don't include _csm in this test, as irrelevant to state. 252 return _internals._lookup == rhs_._internals._lookup && 253 _internals._dfa_alphabet == rhs_._internals._dfa_alphabet && 254 _internals._dfa == rhs_._internals._dfa && 255 _internals._seen_BOL_assertion == 256 rhs_._internals._seen_BOL_assertion && 257 _internals._seen_EOL_assertion == 258 rhs_._internals._seen_EOL_assertion; 259 } 260 begin() const261 iterator begin () const 262 { 263 iterator iter_; 264 265 iter_._sm = const_cast<basic_state_machine *>(this); 266 check_for_csm (); 267 268 if (!_csm.empty ()) 269 { 270 const typename detail::basic_char_state_machine<CharT>:: 271 state_vector *ptr_ = &_csm._sm_vector.front (); 272 273 iter_._dfas = _csm._sm_vector.size (); 274 iter_._states = iter_._data.states = ptr_->size (); 275 iter_._transitions = iter_._data.transitions = 276 ptr_->front ()._transitions.size (); 277 iter_._dfa = iter_._data.dfa = 0; 278 iter_._state = iter_._data.state = 0; 279 iter_._transition = 0; 280 iter_._data.end_state = ptr_->front ()._end_state; 281 iter_._data.id = ptr_->front ()._id; 282 iter_._data.unique_id = ptr_->front ()._unique_id; 283 iter_._data.goto_dfa = ptr_->front ()._state; 284 iter_._data.bol_index = ptr_->front ()._bol_index; 285 iter_._data.eol_index = ptr_->front ()._eol_index; 286 iter_._token_iter = ptr_->front ()._transitions.begin (); 287 iter_._token_end = ptr_->front ()._transitions.end (); 288 289 // Deal with case where there is only a bol or eol 290 // but no other transitions. 291 if (iter_._transitions) 292 { 293 ++iter_; 294 } 295 } 296 297 return iter_; 298 } 299 end() const300 iterator end () const 301 { 302 iterator iter_; 303 304 iter_._sm = const_cast<basic_state_machine *>(this); 305 return iter_; 306 } 307 swap(basic_state_machine & sm_)308 void swap (basic_state_machine &sm_) 309 { 310 _internals.swap (sm_._internals); 311 _csm.swap (sm_._csm); 312 } 313 data() const314 const detail::internals &data () const 315 { 316 return _internals; 317 } 318 319 private: 320 detail::internals _internals; 321 mutable detail::basic_char_state_machine<CharT> _csm; 322 check_for_csm() const323 void check_for_csm () const 324 { 325 if (_csm.empty ()) 326 { 327 human_readable (_csm); 328 } 329 } 330 human_readable(detail::basic_char_state_machine<CharT> & sm_) const331 void human_readable (detail::basic_char_state_machine<CharT> &sm_) const 332 { 333 const std::size_t max_ = sizeof (CharT) == 1 ? 334 num_chars : num_wchar_ts; 335 const std::size_t start_states_ = _internals._dfa->size (); 336 337 sm_.clear (); 338 sm_._sm_vector.resize (start_states_); 339 340 for (std::size_t start_state_index_ = 0; 341 start_state_index_ < start_states_; ++start_state_index_) 342 { 343 const detail::internals::size_t_vector *lu_ = 344 _internals._lookup[start_state_index_]; 345 const std::size_t alphabet_ = 346 _internals._dfa_alphabet[start_state_index_] - dfa_offset; 347 std::vector<std::basic_string<CharT> > chars_ (alphabet_); 348 const std::size_t states_ = _internals._dfa[start_state_index_]-> 349 size () / (alphabet_ + dfa_offset); 350 const std::size_t *read_ptr_ = &_internals. 351 _dfa[start_state_index_]->front () + alphabet_ + dfa_offset; 352 353 sm_._sm_vector[start_state_index_].resize (states_ - 1); 354 355 for (std::size_t alpha_index_ = 0; alpha_index_ < max_; 356 ++alpha_index_) 357 { 358 const std::size_t col_ = lu_->at (alpha_index_); 359 360 if (col_ != static_cast<std::size_t>(dead_state_index)) 361 { 362 chars_[col_ - dfa_offset] += static_cast<CharT> 363 (alpha_index_); 364 } 365 } 366 367 for (std::size_t state_index_ = 1; state_index_ < states_; 368 ++state_index_) 369 { 370 typename detail::basic_char_state_machine<CharT>::state 371 *state_ = &sm_._sm_vector[start_state_index_] 372 [state_index_ - 1]; 373 374 state_->_end_state = *read_ptr_ != 0; 375 state_->_id = *(read_ptr_ + id_index); 376 state_->_unique_id = *(read_ptr_ + unique_id_index); 377 state_->_state = *(read_ptr_ + state_index); 378 state_->_bol_index = *(read_ptr_ + bol_index) - 1; 379 state_->_eol_index = *(read_ptr_ + eol_index) - 1; 380 read_ptr_ += dfa_offset; 381 382 for (std::size_t col_index_ = 0; col_index_ < alphabet_; 383 ++col_index_, ++read_ptr_) 384 { 385 const std::size_t transition_ = *read_ptr_; 386 387 if (transition_ != 0) 388 { 389 const std::size_t i_ = transition_ - 1; 390 typename detail::basic_char_state_machine<CharT>:: 391 state::size_t_string_token_map::iterator iter_ = 392 state_->_transitions.find (i_); 393 394 if (iter_ == state_->_transitions.end ()) 395 { 396 basic_string_token<CharT> token_ 397 (false, chars_[col_index_]); 398 typename detail::basic_char_state_machine<CharT>:: 399 state::size_t_string_token_pair pair_ 400 (i_, token_); 401 402 state_->_transitions.insert (pair_); 403 } 404 else 405 { 406 iter_->second._charset += chars_[col_index_]; 407 } 408 } 409 } 410 411 for (typename detail::basic_char_state_machine<CharT>::state:: 412 size_t_string_token_map::iterator iter_ = 413 state_->_transitions.begin (), 414 end_ = state_->_transitions.end (); 415 iter_ != end_; ++iter_) 416 { 417 std::sort (iter_->second._charset.begin (), 418 iter_->second._charset.end ()); 419 iter_->second.normalise (); 420 } 421 } 422 } 423 } 424 }; 425 426 typedef basic_state_machine<char> state_machine; 427 typedef basic_state_machine<wchar_t> wstate_machine; 428 } 429 } 430 431 #endif 432