1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM) 7 #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/support/detail/lexer/generator.hpp> 14 #include <boost/spirit/home/support/detail/lexer/rules.hpp> 15 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 16 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> 17 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> 18 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> 19 #include <boost/spirit/home/support/assert_msg.hpp> 20 #include <boost/mpl/bool.hpp> 21 #include <iterator> // for std::iterator_traits 22 23 namespace boost { namespace spirit { namespace lex { namespace lexertl 24 { 25 namespace detail 26 { 27 /////////////////////////////////////////////////////////////////////// 28 template <typename Char> zstr_compare(Char const * s1,Char const * s2)29 inline bool zstr_compare(Char const* s1, Char const* s2) 30 { 31 for (; *s1 || *s2; ++s1, ++s2) 32 if (*s1 != *s2) 33 return false; 34 return true; 35 } 36 37 template <typename Char, typename F> get_state_id(Char const * state,F f,std::size_t numstates)38 inline std::size_t get_state_id(Char const* state, F f 39 , std::size_t numstates) 40 { 41 for (std::size_t i = 0; i < numstates; ++i) 42 { 43 if (zstr_compare(f(i), state)) 44 return i; 45 } 46 return boost::lexer::npos; 47 } 48 49 /////////////////////////////////////////////////////////////////////// 50 template <typename Iterator, typename HasActors, typename HasState 51 , typename TokenValue> 52 class static_data; // no default specialization 53 54 /////////////////////////////////////////////////////////////////////// 55 // doesn't support no state and no actors 56 template <typename Iterator, typename TokenValue> 57 class static_data<Iterator, mpl::false_, mpl::false_, TokenValue> 58 { 59 protected: 60 typedef typename 61 std::iterator_traits<Iterator>::value_type 62 char_type; 63 64 public: 65 typedef Iterator base_iterator_type; 66 typedef iterator_range<Iterator> token_value_type; 67 typedef token_value_type get_value_type; 68 typedef std::size_t state_type; 69 typedef char_type const* state_name_type; 70 typedef unused_type semantic_actions_type; 71 typedef detail::wrap_action<unused_type, Iterator, static_data 72 , std::size_t> wrap_action_type; 73 74 typedef std::size_t (*next_token_functor)(std::size_t&, 75 bool&, Iterator&, Iterator const&, std::size_t&); 76 typedef char_type const* (*get_state_name_type)(std::size_t); 77 78 // initialize the shared data 79 template <typename IterData> static_data(IterData const & data,Iterator & first,Iterator const & last)80 static_data (IterData const& data, Iterator& first 81 , Iterator const& last) 82 : first_(first), last_(last) 83 , next_token_(data.next_) 84 , get_state_name_(data.get_state_name_) 85 , bol_(data.bol_) {} 86 87 // The following functions are used by the implementation of the 88 // placeholder '_state'. 89 template <typename Char> set_state_name(Char const *)90 void set_state_name (Char const*) 91 { 92 // If you see a compile time assertion below you're probably 93 // using a token type not supporting lexer states (the 3rd 94 // template parameter of the token is mpl::false_), but your 95 // code uses state changes anyways. 96 BOOST_SPIRIT_ASSERT_FAIL(Char, 97 tried_to_set_state_of_stateless_token, ()); 98 } get_state_name() const99 char_type const* get_state_name() const 100 { 101 return get_state_name_(0); 102 } get_state_id(char_type const *) const103 std::size_t get_state_id(char_type const*) const 104 { 105 return 0; 106 } 107 108 // The function get_eoi() is used by the implementation of the 109 // placeholder '_eoi'. get_eoi() const110 Iterator const& get_eoi() const { return last_; } 111 112 // The function less() is used by the implementation of the support 113 // function lex::less(). Its functionality is equivalent to flex' 114 // function yyless(): it returns an iterator positioned to the 115 // nth input character beyond the current start iterator (i.e. by 116 // assigning the return value to the placeholder '_end' it is 117 // possible to return all but the first n characters of the current 118 // token back to the input stream. 119 // 120 // This function does nothing as long as no semantic actions are 121 // used. less(Iterator const & it,int)122 Iterator const& less(Iterator const& it, int) 123 { 124 // The following assertion fires most likely because you are 125 // using lexer semantic actions without using the actor_lexer 126 // as the base class for your token definition class. 127 BOOST_ASSERT(false && 128 "Are you using lexer semantic actions without using the " 129 "actor_lexer base?"); 130 return it; 131 } 132 133 // The function more() is used by the implementation of the support 134 // function lex::more(). Its functionality is equivalent to flex' 135 // function yymore(): it tells the lexer that the next time it 136 // matches a rule, the corresponding token should be appended onto 137 // the current token value rather than replacing it. 138 // 139 // These functions do nothing as long as no semantic actions are 140 // used. more()141 void more() 142 { 143 // The following assertion fires most likely because you are 144 // using lexer semantic actions without using the actor_lexer 145 // as the base class for your token definition class. 146 BOOST_ASSERT(false && 147 "Are you using lexer semantic actions without using the " 148 "actor_lexer base?"); 149 } adjust_start()150 bool adjust_start() { return false; } revert_adjust_start()151 void revert_adjust_start() {} 152 153 // The function lookahead() is used by the implementation of the 154 // support function lex::lookahead. It can be used to implement 155 // lookahead for lexer engines not supporting constructs like flex' 156 // a/b (match a, but only when followed by b): 157 // 158 // This function does nothing as long as no semantic actions are 159 // used. lookahead(std::size_t,std::size_t=std::size_t (~0))160 bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) 161 { 162 // The following assertion fires most likely because you are 163 // using lexer semantic actions without using the actor_lexer 164 // as the base class for your token definition class. 165 BOOST_ASSERT(false && 166 "Are you using lexer semantic actions without using the " 167 "actor_lexer base?"); 168 return false; 169 } 170 171 // the functions next, invoke_actions, and get_state are used by 172 // the functor implementation below 173 174 // The function next() tries to match the next token from the 175 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)176 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 177 { 178 prev_bol = bol_; 179 180 std::size_t state = 0; 181 return next_token_(state, bol_, end, last_, unique_id); 182 } 183 184 // nothing to invoke, so this is empty invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)185 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t 186 , std::size_t, std::size_t, Iterator const&) 187 { 188 return pass_flags::pass_normal; // always accept 189 } 190 get_state() const191 std::size_t get_state() const { return 0; } set_state(std::size_t)192 void set_state(std::size_t) {} 193 set_end(Iterator const &)194 void set_end(Iterator const&) {} 195 get_first()196 Iterator& get_first() { return first_; } get_first() const197 Iterator const& get_first() const { return first_; } get_last() const198 Iterator const& get_last() const { return last_; } 199 get_value() const200 iterator_range<Iterator> get_value() const 201 { 202 return iterator_range<Iterator>(first_, last_); 203 } has_value() const204 bool has_value() const { return false; } reset_value()205 void reset_value() {} 206 reset_bol(bool bol)207 void reset_bol(bool bol) { bol_ = bol; } 208 209 protected: 210 Iterator& first_; 211 Iterator last_; 212 213 next_token_functor next_token_; 214 get_state_name_type get_state_name_; 215 216 bool bol_; // helper storing whether last character was \n 217 218 // silence MSVC warning C4512: assignment operator could not be generated 219 BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) 220 }; 221 222 /////////////////////////////////////////////////////////////////////// 223 // doesn't support lexer semantic actions, but supports state 224 template <typename Iterator, typename TokenValue> 225 class static_data<Iterator, mpl::false_, mpl::true_, TokenValue> 226 : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue> 227 { 228 protected: 229 typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; 230 typedef typename base_type::char_type char_type; 231 232 public: 233 typedef Iterator base_iterator_type; 234 typedef iterator_range<Iterator> token_value_type; 235 typedef token_value_type get_value_type; 236 typedef typename base_type::state_type state_type; 237 typedef typename base_type::state_name_type state_name_type; 238 typedef typename base_type::semantic_actions_type 239 semantic_actions_type; 240 241 // initialize the shared data 242 template <typename IterData> static_data(IterData const & data,Iterator & first,Iterator const & last)243 static_data (IterData const& data, Iterator& first 244 , Iterator const& last) 245 : base_type(data, first, last), state_(0) 246 , num_states_(data.num_states_) {} 247 248 // The following functions are used by the implementation of the 249 // placeholder '_state'. set_state_name(char_type const * new_state)250 void set_state_name (char_type const* new_state) 251 { 252 std::size_t state_id = lexertl::detail::get_state_id(new_state 253 , this->get_state_name_, num_states_); 254 255 // if the following assertion fires you've probably been using 256 // a lexer state name which was not defined in your token 257 // definition 258 BOOST_ASSERT(state_id != boost::lexer::npos); 259 260 if (state_id != boost::lexer::npos) 261 state_ = state_id; 262 } get_state_name() const263 char_type const* get_state_name() const 264 { 265 return this->get_state_name_(state_); 266 } get_state_id(char_type const * state) const267 std::size_t get_state_id(char_type const* state) const 268 { 269 return lexertl::detail::get_state_id(state 270 , this->get_state_name_, num_states_); 271 } 272 273 // the functions next() and get_state() are used by the functor 274 // implementation below 275 276 // The function next() tries to match the next token from the 277 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)278 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 279 { 280 prev_bol = this->bol_; 281 return this->next_token_(state_, this->bol_, end, this->last_ 282 , unique_id); 283 } 284 get_state()285 std::size_t& get_state() { return state_; } set_state(std::size_t state)286 void set_state(std::size_t state) { state_ = state; } 287 288 protected: 289 std::size_t state_; 290 std::size_t num_states_; 291 292 // silence MSVC warning C4512: assignment operator could not be generated 293 BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) 294 }; 295 296 /////////////////////////////////////////////////////////////////////// 297 // does support actors, but may have no state 298 template <typename Iterator, typename HasState, typename TokenValue> 299 class static_data<Iterator, mpl::true_, HasState, TokenValue> 300 : public static_data<Iterator, mpl::false_, HasState, TokenValue> 301 { 302 public: 303 typedef semantic_actions<Iterator, HasState, static_data> 304 semantic_actions_type; 305 306 protected: 307 typedef static_data<Iterator, mpl::false_, HasState, TokenValue> 308 base_type; 309 typedef typename base_type::char_type char_type; 310 typedef typename semantic_actions_type::functor_wrapper_type 311 functor_wrapper_type; 312 313 public: 314 typedef Iterator base_iterator_type; 315 typedef TokenValue token_value_type; 316 typedef TokenValue const& get_value_type; 317 typedef typename base_type::state_type state_type; 318 typedef typename base_type::state_name_type state_name_type; 319 320 typedef detail::wrap_action<functor_wrapper_type 321 , Iterator, static_data, std::size_t> wrap_action_type; 322 323 template <typename IterData> static_data(IterData const & data,Iterator & first,Iterator const & last)324 static_data (IterData const& data, Iterator& first 325 , Iterator const& last) 326 : base_type(data, first, last) 327 , actions_(data.actions_), hold_() 328 , value_(iterator_range<Iterator>(first, last)) 329 , has_value_(false) 330 , has_hold_(false) 331 {} 332 333 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)334 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 335 , std::size_t& id, std::size_t unique_id, Iterator& end) 336 { 337 return actions_.invoke_actions(state, id, unique_id, end, *this); 338 } 339 340 // The function less() is used by the implementation of the support 341 // function lex::less(). Its functionality is equivalent to flex' 342 // function yyless(): it returns an iterator positioned to the 343 // nth input character beyond the current start iterator (i.e. by 344 // assigning the return value to the placeholder '_end' it is 345 // possible to return all but the first n characters of the current 346 // token back to the input stream). less(Iterator & it,int n)347 Iterator const& less(Iterator& it, int n) 348 { 349 it = this->get_first(); 350 std::advance(it, n); 351 return it; 352 } 353 354 // The function more() is used by the implementation of the support 355 // function lex::more(). Its functionality is equivalent to flex' 356 // function yymore(): it tells the lexer that the next time it 357 // matches a rule, the corresponding token should be appended onto 358 // the current token value rather than replacing it. more()359 void more() 360 { 361 hold_ = this->get_first(); 362 has_hold_ = true; 363 } 364 365 // The function lookahead() is used by the implementation of the 366 // support function lex::lookahead. It can be used to implement 367 // lookahead for lexer engines not supporting constructs like flex' 368 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))369 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 370 { 371 Iterator end = end_; 372 std::size_t unique_id = boost::lexer::npos; 373 bool bol = this->bol_; 374 375 if (std::size_t(~0) == state) 376 state = this->state_; 377 378 return id == this->next_token_( 379 state, bol, end, this->get_eoi(), unique_id); 380 } 381 382 // The adjust_start() and revert_adjust_start() are helper 383 // functions needed to implement the functionality required for 384 // lex::more(). It is called from the functor body below. adjust_start()385 bool adjust_start() 386 { 387 if (!has_hold_) 388 return false; 389 390 std::swap(this->get_first(), hold_); 391 has_hold_ = false; 392 return true; 393 } revert_adjust_start()394 void revert_adjust_start() 395 { 396 // this will be called only if adjust_start above returned true 397 std::swap(this->get_first(), hold_); 398 has_hold_ = true; 399 } 400 get_value() const401 TokenValue const& get_value() const 402 { 403 if (!has_value_) { 404 value_ = iterator_range<Iterator>(this->get_first(), end_); 405 has_value_ = true; 406 } 407 return value_; 408 } 409 template <typename Value> set_value(Value const & val)410 void set_value(Value const& val) 411 { 412 value_ = val; 413 has_value_ = true; 414 } set_end(Iterator const & it)415 void set_end(Iterator const& it) 416 { 417 end_ = it; 418 } has_value() const419 bool has_value() const { return has_value_; } reset_value()420 void reset_value() { has_value_ = false; } 421 422 protected: 423 semantic_actions_type const& actions_; 424 Iterator hold_; // iterator needed to support lex::more() 425 Iterator end_; // iterator pointing to end of matched token 426 mutable TokenValue value_; // token value to use 427 mutable bool has_value_; // 'true' if value_ is valid 428 bool has_hold_; // 'true' if hold_ is valid 429 430 // silence MSVC warning C4512: assignment operator could not be generated 431 BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) 432 }; 433 434 /////////////////////////////////////////////////////////////////////// 435 // does support lexer semantic actions, may support state, is used for 436 // position_token exposing exactly one type 437 template <typename Iterator, typename HasState, typename TokenValue> 438 class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > 439 : public static_data<Iterator, mpl::false_, HasState, TokenValue> 440 { 441 public: 442 typedef semantic_actions<Iterator, HasState, static_data> 443 semantic_actions_type; 444 445 protected: 446 typedef static_data<Iterator, mpl::false_, HasState, TokenValue> 447 base_type; 448 typedef typename base_type::char_type char_type; 449 typedef typename semantic_actions_type::functor_wrapper_type 450 functor_wrapper_type; 451 452 public: 453 typedef Iterator base_iterator_type; 454 typedef boost::optional<TokenValue> token_value_type; 455 typedef boost::optional<TokenValue> const& get_value_type; 456 typedef typename base_type::state_type state_type; 457 typedef typename base_type::state_name_type state_name_type; 458 459 typedef detail::wrap_action<functor_wrapper_type 460 , Iterator, static_data, std::size_t> wrap_action_type; 461 462 template <typename IterData> static_data(IterData const & data_,Iterator & first,Iterator const & last)463 static_data (IterData const& data_, Iterator& first, Iterator const& last) 464 : base_type(data_, first, last) 465 , actions_(data_.actions_), hold_() 466 , has_value_(false), has_hold_(false) 467 { 468 spirit::traits::assign_to(first, last, value_); 469 has_value_ = true; 470 } 471 472 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)473 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 474 , std::size_t& id, std::size_t unique_id, Iterator& end) 475 { 476 return actions_.invoke_actions(state, id, unique_id, end, *this); 477 } 478 479 // The function less() is used by the implementation of the support 480 // function lex::less(). Its functionality is equivalent to flex' 481 // function yyless(): it returns an iterator positioned to the 482 // nth input character beyond the current start iterator (i.e. by 483 // assigning the return value to the placeholder '_end' it is 484 // possible to return all but the first n characters of the current 485 // token back to the input stream). less(Iterator & it,int n)486 Iterator const& less(Iterator& it, int n) 487 { 488 it = this->get_first(); 489 std::advance(it, n); 490 return it; 491 } 492 493 // The function more() is used by the implementation of the support 494 // function lex::more(). Its functionality is equivalent to flex' 495 // function yymore(): it tells the lexer that the next time it 496 // matches a rule, the corresponding token should be appended onto 497 // the current token value rather than replacing it. more()498 void more() 499 { 500 hold_ = this->get_first(); 501 has_hold_ = true; 502 } 503 504 // The function lookahead() is used by the implementation of the 505 // support function lex::lookahead. It can be used to implement 506 // lookahead for lexer engines not supporting constructs like flex' 507 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))508 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 509 { 510 Iterator end = end_; 511 std::size_t unique_id = boost::lexer::npos; 512 bool bol = this->bol_; 513 514 if (std::size_t(~0) == state) 515 state = this->state_; 516 517 return id == this->next_token_( 518 state, bol, end, this->get_eoi(), unique_id); 519 } 520 521 // The adjust_start() and revert_adjust_start() are helper 522 // functions needed to implement the functionality required for 523 // lex::more(). It is called from the functor body below. adjust_start()524 bool adjust_start() 525 { 526 if (!has_hold_) 527 return false; 528 529 std::swap(this->get_first(), hold_); 530 has_hold_ = false; 531 return true; 532 } revert_adjust_start()533 void revert_adjust_start() 534 { 535 // this will be called only if adjust_start above returned true 536 std::swap(this->get_first(), hold_); 537 has_hold_ = true; 538 } 539 get_value() const540 TokenValue const& get_value() const 541 { 542 if (!has_value_) { 543 spirit::traits::assign_to(this->get_first(), end_, value_); 544 has_value_ = true; 545 } 546 return value_; 547 } 548 template <typename Value> set_value(Value const & val)549 void set_value(Value const& val) 550 { 551 value_ = val; 552 has_value_ = true; 553 } set_end(Iterator const & it)554 void set_end(Iterator const& it) 555 { 556 end_ = it; 557 } has_value() const558 bool has_value() const { return has_value_; } reset_value()559 void reset_value() { has_value_ = false; } 560 561 protected: 562 semantic_actions_type const& actions_; 563 Iterator hold_; // iterator needed to support lex::more() 564 Iterator end_; // iterator pointing to end of matched token 565 mutable token_value_type value_; // token value to use 566 mutable bool has_value_; // 'true' if value_ is valid 567 bool has_hold_; // 'true' if hold_ is valid 568 569 // silence MSVC warning C4512: assignment operator could not be generated 570 BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&)) 571 }; 572 } 573 }}}} 574 575 #endif 576