1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM) 7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/qi/detail/assign_to.hpp> 14 #include <boost/spirit/home/support/detail/lexer/generator.hpp> 15 #include <boost/spirit/home/support/detail/lexer/rules.hpp> 16 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 17 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> 18 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> 19 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> 20 #include <boost/spirit/home/support/assert_msg.hpp> 21 #include <boost/mpl/bool.hpp> 22 #include <boost/optional.hpp> 23 #include <iterator> // for std::iterator_traits 24 25 namespace boost { namespace spirit { namespace lex { namespace lexertl 26 { 27 namespace detail 28 { 29 /////////////////////////////////////////////////////////////////////// 30 template <typename Iterator, typename HasActors, typename HasState 31 , typename TokenValue> 32 class data; // no default specialization 33 34 /////////////////////////////////////////////////////////////////////// 35 // neither supports state, nor actors 36 template <typename Iterator, typename TokenValue> 37 class data<Iterator, mpl::false_, mpl::false_, TokenValue> 38 { 39 protected: 40 typedef typename 41 std::iterator_traits<Iterator>::value_type 42 char_type; 43 44 public: 45 typedef Iterator base_iterator_type; 46 typedef iterator_range<Iterator> token_value_type; 47 typedef token_value_type get_value_type; 48 typedef std::size_t state_type; 49 typedef char_type const* state_name_type; 50 typedef unused_type semantic_actions_type; 51 typedef detail::wrap_action<unused_type, Iterator, data, std::size_t> 52 wrap_action_type; 53 54 typedef unused_type next_token_functor; 55 typedef unused_type get_state_name_type; 56 57 // initialize the shared data 58 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)59 data (IterData const& data_, Iterator& first, Iterator const& last) 60 : first_(first), last_(last) 61 , state_machine_(data_.state_machine_) 62 , rules_(data_.rules_) 63 , bol_(data_.state_machine_.data()._seen_BOL_assertion) {} 64 65 // The following functions are used by the implementation of the 66 // placeholder '_state'. 67 template <typename Char> set_state_name(Char const *)68 void set_state_name (Char const*) 69 { 70 // If you see a compile time assertion below you're probably 71 // using a token type not supporting lexer states (the 3rd 72 // template parameter of the token is mpl::false_), but your 73 // code uses state changes anyways. 74 BOOST_SPIRIT_ASSERT_FAIL(Char, 75 tried_to_set_state_of_stateless_token, ()); 76 } get_state_name() const77 char_type const* get_state_name() const { return rules_.initial(); } get_state_id(char_type const *) const78 std::size_t get_state_id (char_type const*) const 79 { 80 return 0; 81 } 82 83 // The function get_eoi() is used by the implementation of the 84 // placeholder '_eoi'. get_eoi() const85 Iterator const& get_eoi() const { return last_; } 86 87 // The function less() is used by the implementation of the support 88 // function lex::less(). Its functionality is equivalent to flex' 89 // function yyless(): it returns an iterator positioned to the 90 // nth input character beyond the current start iterator (i.e. by 91 // assigning the return value to the placeholder '_end' it is 92 // possible to return all but the first n characters of the current 93 // token back to the input stream. 94 // 95 // This function does nothing as long as no semantic actions are 96 // used. less(Iterator const & it,int)97 Iterator const& less(Iterator const& it, int) 98 { 99 // The following assertion fires most likely because you are 100 // using lexer semantic actions without using the actor_lexer 101 // as the base class for your token definition class. 102 BOOST_ASSERT(false && 103 "Are you using lexer semantic actions without using the " 104 "actor_lexer base?"); 105 return it; 106 } 107 108 // The function more() is used by the implementation of the support 109 // function lex::more(). Its functionality is equivalent to flex' 110 // function yymore(): it tells the lexer that the next time it 111 // matches a rule, the corresponding token should be appended onto 112 // the current token value rather than replacing it. 113 // 114 // These functions do nothing as long as no semantic actions are 115 // used. more()116 void more() 117 { 118 // The following assertion fires most likely because you are 119 // using lexer semantic actions without using the actor_lexer 120 // as the base class for your token definition class. 121 BOOST_ASSERT(false && 122 "Are you using lexer semantic actions without using the " 123 "actor_lexer base?"); 124 } adjust_start()125 bool adjust_start() { return false; } revert_adjust_start()126 void revert_adjust_start() {} 127 128 // The function lookahead() is used by the implementation of the 129 // support function lex::lookahead. It can be used to implement 130 // lookahead for lexer engines not supporting constructs like flex' 131 // a/b (match a, but only when followed by b): 132 // 133 // This function does nothing as long as no semantic actions are 134 // used. lookahead(std::size_t,std::size_t=std::size_t (~0))135 bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) 136 { 137 // The following assertion fires most likely because you are 138 // using lexer semantic actions without using the actor_lexer 139 // as the base class for your token definition class. 140 BOOST_ASSERT(false && 141 "Are you using lexer semantic actions without using the " 142 "actor_lexer base?"); 143 return false; 144 } 145 146 // the functions next, invoke_actions, and get_state are used by 147 // the functor implementation below 148 149 // The function next() tries to match the next token from the 150 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)151 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 152 { 153 prev_bol = bol_; 154 155 typedef basic_iterator_tokeniser<Iterator> tokenizer; 156 return tokenizer::next(state_machine_, bol_, end, last_ 157 , unique_id); 158 } 159 160 // nothing to invoke, so this is empty invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)161 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t 162 , std::size_t, std::size_t, Iterator const&) 163 { 164 return pass_flags::pass_normal; // always accept 165 } 166 get_state() const167 std::size_t get_state() const { return 0; } set_state(std::size_t)168 void set_state(std::size_t) {} 169 set_end(Iterator const &)170 void set_end(Iterator const& /*it*/) {} 171 get_first()172 Iterator& get_first() { return first_; } get_first() const173 Iterator const& get_first() const { return first_; } get_last() const174 Iterator const& get_last() const { return last_; } 175 get_value() const176 iterator_range<Iterator> get_value() const 177 { 178 return iterator_range<Iterator>(first_, last_); 179 } has_value() const180 bool has_value() const { return false; } reset_value()181 void reset_value() {} 182 reset_bol(bool bol)183 void reset_bol(bool bol) { bol_ = bol; } 184 185 protected: 186 Iterator& first_; 187 Iterator last_; 188 189 boost::lexer::basic_state_machine<char_type> const& state_machine_; 190 boost::lexer::basic_rules<char_type> const& rules_; 191 192 bool bol_; // helper storing whether last character was \n 193 194 // silence MSVC warning C4512: assignment operator could not be generated 195 BOOST_DELETED_FUNCTION(data& operator= (data const&)) 196 }; 197 198 /////////////////////////////////////////////////////////////////////// 199 // doesn't support lexer semantic actions, but supports state 200 template <typename Iterator, typename TokenValue> 201 class data<Iterator, mpl::false_, mpl::true_, TokenValue> 202 : public data<Iterator, mpl::false_, mpl::false_, TokenValue> 203 { 204 protected: 205 typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; 206 typedef typename base_type::char_type char_type; 207 208 public: 209 typedef Iterator base_iterator_type; 210 typedef iterator_range<Iterator> token_value_type; 211 typedef token_value_type get_value_type; 212 typedef typename base_type::state_type state_type; 213 typedef typename base_type::state_name_type state_name_type; 214 typedef typename base_type::semantic_actions_type 215 semantic_actions_type; 216 217 // initialize the shared data 218 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)219 data (IterData const& data_, Iterator& first, Iterator const& last) 220 : base_type(data_, first, last) 221 , state_(0) {} 222 223 // The following functions are used by the implementation of the 224 // placeholder '_state'. set_state_name(char_type const * new_state)225 void set_state_name (char_type const* new_state) 226 { 227 std::size_t state_id = this->rules_.state(new_state); 228 229 // If the following assertion fires you've probably been using 230 // a lexer state name which was not defined in your token 231 // definition. 232 BOOST_ASSERT(state_id != boost::lexer::npos); 233 234 if (state_id != boost::lexer::npos) 235 state_ = state_id; 236 } get_state_name() const237 char_type const* get_state_name() const 238 { 239 return this->rules_.state(state_); 240 } get_state_id(char_type const * state) const241 std::size_t get_state_id (char_type const* state) const 242 { 243 return this->rules_.state(state); 244 } 245 246 // the functions next() and get_state() are used by the functor 247 // implementation below 248 249 // The function next() tries to match the next token from the 250 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)251 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 252 { 253 prev_bol = this->bol_; 254 255 typedef basic_iterator_tokeniser<Iterator> tokenizer; 256 return tokenizer::next(this->state_machine_, state_, 257 this->bol_, end, this->get_eoi(), unique_id); 258 } 259 get_state()260 std::size_t& get_state() { return state_; } set_state(std::size_t state)261 void set_state(std::size_t state) { state_ = state; } 262 263 protected: 264 std::size_t state_; 265 266 // silence MSVC warning C4512: assignment operator could not be generated 267 BOOST_DELETED_FUNCTION(data& operator= (data const&)) 268 }; 269 270 /////////////////////////////////////////////////////////////////////// 271 // does support lexer semantic actions, may support state 272 template <typename Iterator, typename HasState, typename TokenValue> 273 class data<Iterator, mpl::true_, HasState, TokenValue> 274 : public data<Iterator, mpl::false_, HasState, TokenValue> 275 { 276 public: 277 typedef semantic_actions<Iterator, HasState, data> 278 semantic_actions_type; 279 280 protected: 281 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; 282 typedef typename base_type::char_type char_type; 283 typedef typename semantic_actions_type::functor_wrapper_type 284 functor_wrapper_type; 285 286 public: 287 typedef Iterator base_iterator_type; 288 typedef TokenValue token_value_type; 289 typedef TokenValue const& get_value_type; 290 typedef typename base_type::state_type state_type; 291 typedef typename base_type::state_name_type state_name_type; 292 293 typedef detail::wrap_action<functor_wrapper_type 294 , Iterator, data, std::size_t> wrap_action_type; 295 296 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)297 data (IterData const& data_, Iterator& first, Iterator const& last) 298 : base_type(data_, first, last) 299 , actions_(data_.actions_), hold_(), end_() 300 , value_(iterator_range<Iterator>(last, last)) 301 , has_value_(false), has_hold_(false) {} 302 303 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)304 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 305 , std::size_t& id, std::size_t unique_id, Iterator& end) 306 { 307 return actions_.invoke_actions(state, id, unique_id, end, *this); 308 } 309 310 // The function less() is used by the implementation of the support 311 // function lex::less(). Its functionality is equivalent to flex' 312 // function yyless(): it returns an iterator positioned to the 313 // nth input character beyond the current start iterator (i.e. by 314 // assigning the return value to the placeholder '_end' it is 315 // possible to return all but the first n characters of the current 316 // token back to the input stream). less(Iterator & it,int n)317 Iterator const& less(Iterator& it, int n) 318 { 319 it = this->get_first(); 320 std::advance(it, n); 321 return it; 322 } 323 324 // The function more() is used by the implementation of the support 325 // function lex::more(). Its functionality is equivalent to flex' 326 // function yymore(): it tells the lexer that the next time it 327 // matches a rule, the corresponding token should be appended onto 328 // the current token value rather than replacing it. more()329 void more() 330 { 331 hold_ = this->get_first(); 332 has_hold_ = true; 333 } 334 335 // The function lookahead() is used by the implementation of the 336 // support function lex::lookahead. It can be used to implement 337 // lookahead for lexer engines not supporting constructs like flex' 338 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))339 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 340 { 341 Iterator end = end_; 342 std::size_t unique_id = boost::lexer::npos; 343 bool bol = this->bol_; 344 345 if (std::size_t(~0) == state) 346 state = this->state_; 347 348 typedef basic_iterator_tokeniser<Iterator> tokenizer; 349 return id == tokenizer::next(this->state_machine_, state, 350 bol, end, this->get_eoi(), unique_id); 351 } 352 353 // The adjust_start() and revert_adjust_start() are helper 354 // functions needed to implement the functionality required for 355 // lex::more(). It is called from the functor body below. adjust_start()356 bool adjust_start() 357 { 358 if (!has_hold_) 359 return false; 360 361 std::swap(this->get_first(), hold_); 362 has_hold_ = false; 363 return true; 364 } revert_adjust_start()365 void revert_adjust_start() 366 { 367 // this will be called only if adjust_start above returned true 368 std::swap(this->get_first(), hold_); 369 has_hold_ = true; 370 } 371 get_value() const372 TokenValue const& get_value() const 373 { 374 if (!has_value_) { 375 value_ = iterator_range<Iterator>(this->get_first(), end_); 376 has_value_ = true; 377 } 378 return value_; 379 } 380 template <typename Value> set_value(Value const & val)381 void set_value(Value const& val) 382 { 383 value_ = val; 384 has_value_ = true; 385 } set_end(Iterator const & it)386 void set_end(Iterator const& it) 387 { 388 end_ = it; 389 } has_value() const390 bool has_value() const { return has_value_; } reset_value()391 void reset_value() { has_value_ = false; } 392 393 protected: 394 semantic_actions_type const& actions_; 395 Iterator hold_; // iterator needed to support lex::more() 396 Iterator end_; // iterator pointing to end of matched token 397 mutable TokenValue value_; // token value to use 398 mutable bool has_value_; // 'true' if value_ is valid 399 bool has_hold_; // 'true' if hold_ is valid 400 401 // silence MSVC warning C4512: assignment operator could not be generated 402 BOOST_DELETED_FUNCTION(data& operator= (data const&)) 403 }; 404 405 /////////////////////////////////////////////////////////////////////// 406 // does support lexer semantic actions, may support state, is used for 407 // position_token exposing exactly one type 408 template <typename Iterator, typename HasState, typename TokenValue> 409 class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > 410 : public data<Iterator, mpl::false_, HasState, TokenValue> 411 { 412 public: 413 typedef semantic_actions<Iterator, HasState, data> 414 semantic_actions_type; 415 416 protected: 417 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; 418 typedef typename base_type::char_type char_type; 419 typedef typename semantic_actions_type::functor_wrapper_type 420 functor_wrapper_type; 421 422 public: 423 typedef Iterator base_iterator_type; 424 typedef boost::optional<TokenValue> token_value_type; 425 typedef boost::optional<TokenValue> const& get_value_type; 426 typedef typename base_type::state_type state_type; 427 typedef typename base_type::state_name_type state_name_type; 428 429 typedef detail::wrap_action<functor_wrapper_type 430 , Iterator, data, std::size_t> wrap_action_type; 431 432 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)433 data (IterData const& data_, Iterator& first, Iterator const& last) 434 : base_type(data_, first, last) 435 , actions_(data_.actions_), hold_() 436 , has_value_(false), has_hold_(false) 437 { 438 spirit::traits::assign_to(first, last, value_); 439 has_value_ = true; 440 } 441 442 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)443 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 444 , std::size_t& id, std::size_t unique_id, Iterator& end) 445 { 446 return actions_.invoke_actions(state, id, unique_id, end, *this); 447 } 448 449 // The function less() is used by the implementation of the support 450 // function lex::less(). Its functionality is equivalent to flex' 451 // function yyless(): it returns an iterator positioned to the 452 // nth input character beyond the current start iterator (i.e. by 453 // assigning the return value to the placeholder '_end' it is 454 // possible to return all but the first n characters of the current 455 // token back to the input stream). less(Iterator & it,int n)456 Iterator const& less(Iterator& it, int n) 457 { 458 it = this->get_first(); 459 std::advance(it, n); 460 return it; 461 } 462 463 // The function more() is used by the implementation of the support 464 // function lex::more(). Its functionality is equivalent to flex' 465 // function yymore(): it tells the lexer that the next time it 466 // matches a rule, the corresponding token should be appended onto 467 // the current token value rather than replacing it. more()468 void more() 469 { 470 hold_ = this->get_first(); 471 has_hold_ = true; 472 } 473 474 // The function lookahead() is used by the implementation of the 475 // support function lex::lookahead. It can be used to implement 476 // lookahead for lexer engines not supporting constructs like flex' 477 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))478 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 479 { 480 Iterator end = end_; 481 std::size_t unique_id = boost::lexer::npos; 482 bool bol = this->bol_; 483 484 if (std::size_t(~0) == state) 485 state = this->state_; 486 487 typedef basic_iterator_tokeniser<Iterator> tokenizer; 488 return id == tokenizer::next(this->state_machine_, state, 489 bol, end, this->get_eoi(), unique_id); 490 } 491 492 // The adjust_start() and revert_adjust_start() are helper 493 // functions needed to implement the functionality required for 494 // lex::more(). It is called from the functor body below. adjust_start()495 bool adjust_start() 496 { 497 if (!has_hold_) 498 return false; 499 500 std::swap(this->get_first(), hold_); 501 has_hold_ = false; 502 return true; 503 } revert_adjust_start()504 void revert_adjust_start() 505 { 506 // this will be called only if adjust_start above returned true 507 std::swap(this->get_first(), hold_); 508 has_hold_ = true; 509 } 510 get_value() const511 token_value_type const& get_value() const 512 { 513 if (!has_value_) { 514 spirit::traits::assign_to(this->get_first(), end_, value_); 515 has_value_ = true; 516 } 517 return value_; 518 } 519 template <typename Value> set_value(Value const & val)520 void set_value(Value const& val) 521 { 522 value_ = val; 523 has_value_ = true; 524 } set_end(Iterator const & it)525 void set_end(Iterator const& it) 526 { 527 end_ = it; 528 } has_value() const529 bool has_value() const { return has_value_; } reset_value()530 void reset_value() { has_value_ = false; } 531 532 protected: 533 semantic_actions_type const& actions_; 534 Iterator hold_; // iterator needed to support lex::more() 535 Iterator end_; // iterator pointing to end of matched token 536 mutable token_value_type value_; // token value to use 537 mutable bool has_value_; // 'true' if value_ is valid 538 bool has_hold_; // 'true' if hold_ is valid 539 540 // silence MSVC warning C4512: assignment operator could not be generated 541 BOOST_DELETED_FUNCTION(data& operator= (data const&)) 542 }; 543 } 544 }}}} 545 546 #endif 547 548