1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM)
7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/qi/detail/assign_to.hpp>
14 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
15 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
16 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
18 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
19 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
20 #include <boost/spirit/home/support/assert_msg.hpp>
21 #include <boost/mpl/bool.hpp>
22 #include <boost/optional.hpp>
23 #include <iterator> // for std::iterator_traits
24 
25 namespace boost { namespace spirit { namespace lex { namespace lexertl
26 {
27     namespace detail
28     {
29         ///////////////////////////////////////////////////////////////////////
30         template <typename Iterator, typename HasActors, typename HasState
31           , typename TokenValue>
32         class data;    // no default specialization
33 
34         ///////////////////////////////////////////////////////////////////////
35         //  neither supports state, nor actors
36         template <typename Iterator, typename TokenValue>
37         class data<Iterator, mpl::false_, mpl::false_, TokenValue>
38         {
39         protected:
40             typedef typename
41                 std::iterator_traits<Iterator>::value_type
42             char_type;
43 
44         public:
45             typedef Iterator base_iterator_type;
46             typedef iterator_range<Iterator> token_value_type;
47             typedef token_value_type get_value_type;
48             typedef std::size_t state_type;
49             typedef char_type const* state_name_type;
50             typedef unused_type semantic_actions_type;
51             typedef detail::wrap_action<unused_type, Iterator, data, std::size_t>
52                 wrap_action_type;
53 
54             typedef unused_type next_token_functor;
55             typedef unused_type get_state_name_type;
56 
57             // initialize the shared data
58             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)59             data (IterData const& data_, Iterator& first, Iterator const& last)
60               : first_(first), last_(last)
61               , state_machine_(data_.state_machine_)
62               , rules_(data_.rules_)
63               , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
64 
65             // The following functions are used by the implementation of the
66             // placeholder '_state'.
67             template <typename Char>
set_state_name(Char const *)68             void set_state_name (Char const*)
69             {
70                 // If you see a compile time assertion below you're probably
71                 // using a token type not supporting lexer states (the 3rd
72                 // template parameter of the token is mpl::false_), but your
73                 // code uses state changes anyways.
74                 BOOST_SPIRIT_ASSERT_FAIL(Char,
75                     tried_to_set_state_of_stateless_token, ());
76             }
get_state_name() const77             char_type const* get_state_name() const { return rules_.initial(); }
get_state_id(char_type const *) const78             std::size_t get_state_id (char_type const*) const
79             {
80                 return 0;
81             }
82 
83             // The function get_eoi() is used by the implementation of the
84             // placeholder '_eoi'.
get_eoi() const85             Iterator const& get_eoi() const { return last_; }
86 
87             // The function less() is used by the implementation of the support
88             // function lex::less(). Its functionality is equivalent to flex'
89             // function yyless(): it returns an iterator positioned to the
90             // nth input character beyond the current start iterator (i.e. by
91             // assigning the return value to the placeholder '_end' it is
92             // possible to return all but the first n characters of the current
93             // token back to the input stream.
94             //
95             // This function does nothing as long as no semantic actions are
96             // used.
less(Iterator const & it,int)97             Iterator const& less(Iterator const& it, int)
98             {
99                 // The following assertion fires most likely because you are
100                 // using lexer semantic actions without using the actor_lexer
101                 // as the base class for your token definition class.
102                 BOOST_ASSERT(false &&
103                     "Are you using lexer semantic actions without using the "
104                     "actor_lexer base?");
105                 return it;
106             }
107 
108             // The function more() is used by the implementation of the support
109             // function lex::more(). Its functionality is equivalent to flex'
110             // function yymore(): it tells the lexer that the next time it
111             // matches a rule, the corresponding token should be appended onto
112             // the current token value rather than replacing it.
113             //
114             // These functions do nothing as long as no semantic actions are
115             // used.
more()116             void more()
117             {
118                 // The following assertion fires most likely because you are
119                 // using lexer semantic actions without using the actor_lexer
120                 // as the base class for your token definition class.
121                 BOOST_ASSERT(false &&
122                     "Are you using lexer semantic actions without using the "
123                     "actor_lexer base?");
124             }
adjust_start()125             bool adjust_start() { return false; }
revert_adjust_start()126             void revert_adjust_start() {}
127 
128             // The function lookahead() is used by the implementation of the
129             // support function lex::lookahead. It can be used to implement
130             // lookahead for lexer engines not supporting constructs like flex'
131             // a/b  (match a, but only when followed by b):
132             //
133             // This function does nothing as long as no semantic actions are
134             // used.
lookahead(std::size_t,std::size_t=std::size_t (~0))135             bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
136             {
137                 // The following assertion fires most likely because you are
138                 // using lexer semantic actions without using the actor_lexer
139                 // as the base class for your token definition class.
140                 BOOST_ASSERT(false &&
141                     "Are you using lexer semantic actions without using the "
142                     "actor_lexer base?");
143                 return false;
144             }
145 
146             // the functions next, invoke_actions, and get_state are used by
147             // the functor implementation below
148 
149             // The function next() tries to match the next token from the
150             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)151             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
152             {
153                 prev_bol = bol_;
154 
155                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
156                 return tokenizer::next(state_machine_, bol_, end, last_
157                   , unique_id);
158             }
159 
160             // nothing to invoke, so this is empty
invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)161             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
162               , std::size_t, std::size_t, Iterator const&)
163             {
164                 return pass_flags::pass_normal;    // always accept
165             }
166 
get_state() const167             std::size_t get_state() const { return 0; }
set_state(std::size_t)168             void set_state(std::size_t) {}
169 
set_end(Iterator const &)170             void set_end(Iterator const& /*it*/) {}
171 
get_first()172             Iterator& get_first() { return first_; }
get_first() const173             Iterator const& get_first() const { return first_; }
get_last() const174             Iterator const& get_last() const { return last_; }
175 
get_value() const176             iterator_range<Iterator> get_value() const
177             {
178                 return iterator_range<Iterator>(first_, last_);
179             }
has_value() const180             bool has_value() const { return false; }
reset_value()181             void reset_value() {}
182 
reset_bol(bool bol)183             void reset_bol(bool bol) { bol_ = bol; }
184 
185         protected:
186             Iterator& first_;
187             Iterator last_;
188 
189             boost::lexer::basic_state_machine<char_type> const& state_machine_;
190             boost::lexer::basic_rules<char_type> const& rules_;
191 
192             bool bol_;      // helper storing whether last character was \n
193 
194             // silence MSVC warning C4512: assignment operator could not be generated
195             BOOST_DELETED_FUNCTION(data& operator= (data const&))
196         };
197 
198         ///////////////////////////////////////////////////////////////////////
199         //  doesn't support lexer semantic actions, but supports state
200         template <typename Iterator, typename TokenValue>
201         class data<Iterator, mpl::false_, mpl::true_, TokenValue>
202           : public data<Iterator, mpl::false_, mpl::false_, TokenValue>
203         {
204         protected:
205             typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
206             typedef typename base_type::char_type char_type;
207 
208         public:
209             typedef Iterator base_iterator_type;
210             typedef iterator_range<Iterator> token_value_type;
211             typedef token_value_type get_value_type;
212             typedef typename base_type::state_type state_type;
213             typedef typename base_type::state_name_type state_name_type;
214             typedef typename base_type::semantic_actions_type
215                 semantic_actions_type;
216 
217             // initialize the shared data
218             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)219             data (IterData const& data_, Iterator& first, Iterator const& last)
220               : base_type(data_, first, last)
221               , state_(0) {}
222 
223             // The following functions are used by the implementation of the
224             // placeholder '_state'.
set_state_name(char_type const * new_state)225             void set_state_name (char_type const* new_state)
226             {
227                 std::size_t state_id = this->rules_.state(new_state);
228 
229                 // If the following assertion fires you've probably been using
230                 // a lexer state name which was not defined in your token
231                 // definition.
232                 BOOST_ASSERT(state_id != boost::lexer::npos);
233 
234                 if (state_id != boost::lexer::npos)
235                     state_ = state_id;
236             }
get_state_name() const237             char_type const* get_state_name() const
238             {
239                 return this->rules_.state(state_);
240             }
get_state_id(char_type const * state) const241             std::size_t get_state_id (char_type const* state) const
242             {
243                 return this->rules_.state(state);
244             }
245 
246             // the functions next() and get_state() are used by the functor
247             // implementation below
248 
249             // The function next() tries to match the next token from the
250             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)251             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
252             {
253                 prev_bol = this->bol_;
254 
255                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
256                 return tokenizer::next(this->state_machine_, state_,
257                     this->bol_, end, this->get_eoi(), unique_id);
258             }
259 
get_state()260             std::size_t& get_state() { return state_; }
set_state(std::size_t state)261             void set_state(std::size_t state) { state_ = state; }
262 
263         protected:
264             std::size_t state_;
265 
266             // silence MSVC warning C4512: assignment operator could not be generated
267             BOOST_DELETED_FUNCTION(data& operator= (data const&))
268         };
269 
270         ///////////////////////////////////////////////////////////////////////
271         //  does support lexer semantic actions, may support state
272         template <typename Iterator, typename HasState, typename TokenValue>
273         class data<Iterator, mpl::true_, HasState, TokenValue>
274           : public data<Iterator, mpl::false_, HasState, TokenValue>
275         {
276         public:
277             typedef semantic_actions<Iterator, HasState, data>
278                 semantic_actions_type;
279 
280         protected:
281             typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
282             typedef typename base_type::char_type char_type;
283             typedef typename semantic_actions_type::functor_wrapper_type
284                 functor_wrapper_type;
285 
286         public:
287             typedef Iterator base_iterator_type;
288             typedef TokenValue token_value_type;
289             typedef TokenValue const& get_value_type;
290             typedef typename base_type::state_type state_type;
291             typedef typename base_type::state_name_type state_name_type;
292 
293             typedef detail::wrap_action<functor_wrapper_type
294               , Iterator, data, std::size_t> wrap_action_type;
295 
296             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)297             data (IterData const& data_, Iterator& first, Iterator const& last)
298               : base_type(data_, first, last)
299               , actions_(data_.actions_), hold_(), end_()
300               , value_(iterator_range<Iterator>(last, last))
301               , has_value_(false), has_hold_(false) {}
302 
303             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)304             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
305               , std::size_t& id, std::size_t unique_id, Iterator& end)
306             {
307                 return actions_.invoke_actions(state, id, unique_id, end, *this);
308             }
309 
310             // The function less() is used by the implementation of the support
311             // function lex::less(). Its functionality is equivalent to flex'
312             // function yyless(): it returns an iterator positioned to the
313             // nth input character beyond the current start iterator (i.e. by
314             // assigning the return value to the placeholder '_end' it is
315             // possible to return all but the first n characters of the current
316             // token back to the input stream).
less(Iterator & it,int n)317             Iterator const& less(Iterator& it, int n)
318             {
319                 it = this->get_first();
320                 std::advance(it, n);
321                 return it;
322             }
323 
324             // The function more() is used by the implementation of the support
325             // function lex::more(). Its functionality is equivalent to flex'
326             // function yymore(): it tells the lexer that the next time it
327             // matches a rule, the corresponding token should be appended onto
328             // the current token value rather than replacing it.
more()329             void more()
330             {
331                 hold_ = this->get_first();
332                 has_hold_ = true;
333             }
334 
335             // The function lookahead() is used by the implementation of the
336             // support function lex::lookahead. It can be used to implement
337             // lookahead for lexer engines not supporting constructs like flex'
338             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))339             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
340             {
341                 Iterator end = end_;
342                 std::size_t unique_id = boost::lexer::npos;
343                 bool bol = this->bol_;
344 
345                 if (std::size_t(~0) == state)
346                     state = this->state_;
347 
348                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
349                 return id == tokenizer::next(this->state_machine_, state,
350                     bol, end, this->get_eoi(), unique_id);
351             }
352 
353             // The adjust_start() and revert_adjust_start() are helper
354             // functions needed to implement the functionality required for
355             // lex::more(). It is called from the functor body below.
adjust_start()356             bool adjust_start()
357             {
358                 if (!has_hold_)
359                     return false;
360 
361                 std::swap(this->get_first(), hold_);
362                 has_hold_ = false;
363                 return true;
364             }
revert_adjust_start()365             void revert_adjust_start()
366             {
367                 // this will be called only if adjust_start above returned true
368                 std::swap(this->get_first(), hold_);
369                 has_hold_ = true;
370             }
371 
get_value() const372             TokenValue const& get_value() const
373             {
374                 if (!has_value_) {
375                     value_ = iterator_range<Iterator>(this->get_first(), end_);
376                     has_value_ = true;
377                 }
378                 return value_;
379             }
380             template <typename Value>
set_value(Value const & val)381             void set_value(Value const& val)
382             {
383                 value_ = val;
384                 has_value_ = true;
385             }
set_end(Iterator const & it)386             void set_end(Iterator const& it)
387             {
388                 end_ = it;
389             }
has_value() const390             bool has_value() const { return has_value_; }
reset_value()391             void reset_value() { has_value_ = false; }
392 
393         protected:
394             semantic_actions_type const& actions_;
395             Iterator hold_;     // iterator needed to support lex::more()
396             Iterator end_;      // iterator pointing to end of matched token
397             mutable TokenValue value_;  // token value to use
398             mutable bool has_value_;    // 'true' if value_ is valid
399             bool has_hold_;     // 'true' if hold_ is valid
400 
401             // silence MSVC warning C4512: assignment operator could not be generated
402             BOOST_DELETED_FUNCTION(data& operator= (data const&))
403         };
404 
405         ///////////////////////////////////////////////////////////////////////
406         //  does support lexer semantic actions, may support state, is used for
407         //  position_token exposing exactly one type
408         template <typename Iterator, typename HasState, typename TokenValue>
409         class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
410           : public data<Iterator, mpl::false_, HasState, TokenValue>
411         {
412         public:
413             typedef semantic_actions<Iterator, HasState, data>
414                 semantic_actions_type;
415 
416         protected:
417             typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
418             typedef typename base_type::char_type char_type;
419             typedef typename semantic_actions_type::functor_wrapper_type
420                 functor_wrapper_type;
421 
422         public:
423             typedef Iterator base_iterator_type;
424             typedef boost::optional<TokenValue> token_value_type;
425             typedef boost::optional<TokenValue> const& get_value_type;
426             typedef typename base_type::state_type state_type;
427             typedef typename base_type::state_name_type state_name_type;
428 
429             typedef detail::wrap_action<functor_wrapper_type
430               , Iterator, data, std::size_t> wrap_action_type;
431 
432             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)433             data (IterData const& data_, Iterator& first, Iterator const& last)
434               : base_type(data_, first, last)
435               , actions_(data_.actions_), hold_()
436               , has_value_(false), has_hold_(false)
437             {
438                 spirit::traits::assign_to(first, last, value_);
439                 has_value_ = true;
440             }
441 
442             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)443             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
444               , std::size_t& id, std::size_t unique_id, Iterator& end)
445             {
446                 return actions_.invoke_actions(state, id, unique_id, end, *this);
447             }
448 
449             // The function less() is used by the implementation of the support
450             // function lex::less(). Its functionality is equivalent to flex'
451             // function yyless(): it returns an iterator positioned to the
452             // nth input character beyond the current start iterator (i.e. by
453             // assigning the return value to the placeholder '_end' it is
454             // possible to return all but the first n characters of the current
455             // token back to the input stream).
less(Iterator & it,int n)456             Iterator const& less(Iterator& it, int n)
457             {
458                 it = this->get_first();
459                 std::advance(it, n);
460                 return it;
461             }
462 
463             // The function more() is used by the implementation of the support
464             // function lex::more(). Its functionality is equivalent to flex'
465             // function yymore(): it tells the lexer that the next time it
466             // matches a rule, the corresponding token should be appended onto
467             // the current token value rather than replacing it.
more()468             void more()
469             {
470                 hold_ = this->get_first();
471                 has_hold_ = true;
472             }
473 
474             // The function lookahead() is used by the implementation of the
475             // support function lex::lookahead. It can be used to implement
476             // lookahead for lexer engines not supporting constructs like flex'
477             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))478             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
479             {
480                 Iterator end = end_;
481                 std::size_t unique_id = boost::lexer::npos;
482                 bool bol = this->bol_;
483 
484                 if (std::size_t(~0) == state)
485                     state = this->state_;
486 
487                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
488                 return id == tokenizer::next(this->state_machine_, state,
489                     bol, end, this->get_eoi(), unique_id);
490             }
491 
492             // The adjust_start() and revert_adjust_start() are helper
493             // functions needed to implement the functionality required for
494             // lex::more(). It is called from the functor body below.
adjust_start()495             bool adjust_start()
496             {
497                 if (!has_hold_)
498                     return false;
499 
500                 std::swap(this->get_first(), hold_);
501                 has_hold_ = false;
502                 return true;
503             }
revert_adjust_start()504             void revert_adjust_start()
505             {
506                 // this will be called only if adjust_start above returned true
507                 std::swap(this->get_first(), hold_);
508                 has_hold_ = true;
509             }
510 
get_value() const511             token_value_type const& get_value() const
512             {
513                 if (!has_value_) {
514                     spirit::traits::assign_to(this->get_first(), end_, value_);
515                     has_value_ = true;
516                 }
517                 return value_;
518             }
519             template <typename Value>
set_value(Value const & val)520             void set_value(Value const& val)
521             {
522                 value_ = val;
523                 has_value_ = true;
524             }
set_end(Iterator const & it)525             void set_end(Iterator const& it)
526             {
527                 end_ = it;
528             }
has_value() const529             bool has_value() const { return has_value_; }
reset_value()530             void reset_value() { has_value_ = false; }
531 
532         protected:
533             semantic_actions_type const& actions_;
534             Iterator hold_;     // iterator needed to support lex::more()
535             Iterator end_;      // iterator pointing to end of matched token
536             mutable token_value_type value_;  // token value to use
537             mutable bool has_value_;    // 'true' if value_ is valid
538             bool has_hold_;     // 'true' if hold_ is valid
539 
540             // silence MSVC warning C4512: assignment operator could not be generated
541             BOOST_DELETED_FUNCTION(data& operator= (data const&))
542         };
543     }
544 }}}}
545 
546 #endif
547 
548