1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM)
7 #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
14 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
15 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
16 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
18 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
19 #include <boost/spirit/home/support/assert_msg.hpp>
20 #include <boost/mpl/bool.hpp>
21 #include <iterator> // for std::iterator_traits
22 
23 namespace boost { namespace spirit { namespace lex { namespace lexertl
24 {
25     namespace detail
26     {
27         ///////////////////////////////////////////////////////////////////////
28         template <typename Char>
zstr_compare(Char const * s1,Char const * s2)29         inline bool zstr_compare(Char const* s1, Char const* s2)
30         {
31             for (; *s1 || *s2; ++s1, ++s2)
32                 if (*s1 != *s2)
33                     return false;
34             return true;
35         }
36 
37         template <typename Char, typename F>
get_state_id(Char const * state,F f,std::size_t numstates)38         inline std::size_t get_state_id(Char const* state, F f
39           , std::size_t numstates)
40         {
41             for (std::size_t i = 0; i < numstates; ++i)
42             {
43                 if (zstr_compare(f(i), state))
44                     return i;
45             }
46             return boost::lexer::npos;
47         }
48 
49         ///////////////////////////////////////////////////////////////////////
50         template <typename Iterator, typename HasActors, typename HasState
51           , typename TokenValue>
52         class static_data;    // no default specialization
53 
54         ///////////////////////////////////////////////////////////////////////
55         //  doesn't support no state and no actors
56         template <typename Iterator, typename TokenValue>
57         class static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
58         {
59         protected:
60             typedef typename
61                 std::iterator_traits<Iterator>::value_type
62             char_type;
63 
64         public:
65             typedef Iterator base_iterator_type;
66             typedef iterator_range<Iterator> token_value_type;
67             typedef token_value_type get_value_type;
68             typedef std::size_t state_type;
69             typedef char_type const* state_name_type;
70             typedef unused_type semantic_actions_type;
71             typedef detail::wrap_action<unused_type, Iterator, static_data
72               , std::size_t> wrap_action_type;
73 
74             typedef std::size_t (*next_token_functor)(std::size_t&,
75                 bool&, Iterator&, Iterator const&, std::size_t&);
76             typedef char_type const* (*get_state_name_type)(std::size_t);
77 
78             // initialize the shared data
79             template <typename IterData>
static_data(IterData const & data,Iterator & first,Iterator const & last)80             static_data (IterData const& data, Iterator& first
81                   , Iterator const& last)
82               : first_(first), last_(last)
83               , next_token_(data.next_)
84               , get_state_name_(data.get_state_name_)
85               , bol_(data.bol_) {}
86 
87             // The following functions are used by the implementation of the
88             // placeholder '_state'.
89             template <typename Char>
set_state_name(Char const *)90             void set_state_name (Char const*)
91             {
92                 // If you see a compile time assertion below you're probably
93                 // using a token type not supporting lexer states (the 3rd
94                 // template parameter of the token is mpl::false_), but your
95                 // code uses state changes anyways.
96                 BOOST_SPIRIT_ASSERT_FAIL(Char,
97                     tried_to_set_state_of_stateless_token, ());
98             }
get_state_name() const99             char_type const* get_state_name() const
100             {
101                 return get_state_name_(0);
102             }
get_state_id(char_type const *) const103             std::size_t get_state_id(char_type const*) const
104             {
105                 return 0;
106             }
107 
108             // The function get_eoi() is used by the implementation of the
109             // placeholder '_eoi'.
get_eoi() const110             Iterator const& get_eoi() const { return last_; }
111 
112             // The function less() is used by the implementation of the support
113             // function lex::less(). Its functionality is equivalent to flex'
114             // function yyless(): it returns an iterator positioned to the
115             // nth input character beyond the current start iterator (i.e. by
116             // assigning the return value to the placeholder '_end' it is
117             // possible to return all but the first n characters of the current
118             // token back to the input stream.
119             //
120             // This function does nothing as long as no semantic actions are
121             // used.
less(Iterator const & it,int)122             Iterator const& less(Iterator const& it, int)
123             {
124                 // The following assertion fires most likely because you are
125                 // using lexer semantic actions without using the actor_lexer
126                 // as the base class for your token definition class.
127                 BOOST_ASSERT(false &&
128                     "Are you using lexer semantic actions without using the "
129                     "actor_lexer base?");
130                 return it;
131             }
132 
133             // The function more() is used by the implementation of the support
134             // function lex::more(). Its functionality is equivalent to flex'
135             // function yymore(): it tells the lexer that the next time it
136             // matches a rule, the corresponding token should be appended onto
137             // the current token value rather than replacing it.
138             //
139             // These functions do nothing as long as no semantic actions are
140             // used.
more()141             void more()
142             {
143                 // The following assertion fires most likely because you are
144                 // using lexer semantic actions without using the actor_lexer
145                 // as the base class for your token definition class.
146                 BOOST_ASSERT(false &&
147                     "Are you using lexer semantic actions without using the "
148                     "actor_lexer base?");
149             }
adjust_start()150             bool adjust_start() { return false; }
revert_adjust_start()151             void revert_adjust_start() {}
152 
153             // The function lookahead() is used by the implementation of the
154             // support function lex::lookahead. It can be used to implement
155             // lookahead for lexer engines not supporting constructs like flex'
156             // a/b  (match a, but only when followed by b):
157             //
158             // This function does nothing as long as no semantic actions are
159             // used.
lookahead(std::size_t,std::size_t=std::size_t (~0))160             bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
161             {
162                 // The following assertion fires most likely because you are
163                 // using lexer semantic actions without using the actor_lexer
164                 // as the base class for your token definition class.
165                 BOOST_ASSERT(false &&
166                     "Are you using lexer semantic actions without using the "
167                     "actor_lexer base?");
168                 return false;
169             }
170 
171             // the functions next, invoke_actions, and get_state are used by
172             // the functor implementation below
173 
174             // The function next() tries to match the next token from the
175             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)176             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
177             {
178                 prev_bol = bol_;
179 
180                 std::size_t state = 0;
181                 return next_token_(state, bol_, end, last_, unique_id);
182             }
183 
184             // nothing to invoke, so this is empty
invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)185             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
186               , std::size_t, std::size_t, Iterator const&)
187             {
188                 return pass_flags::pass_normal;    // always accept
189             }
190 
get_state() const191             std::size_t get_state() const { return 0; }
set_state(std::size_t)192             void set_state(std::size_t) {}
193 
set_end(Iterator const &)194             void set_end(Iterator const&) {}
195 
get_first()196             Iterator& get_first() { return first_; }
get_first() const197             Iterator const& get_first() const { return first_; }
get_last() const198             Iterator const& get_last() const { return last_; }
199 
get_value() const200             iterator_range<Iterator> get_value() const
201             {
202                 return iterator_range<Iterator>(first_, last_);
203             }
has_value() const204             bool has_value() const { return false; }
reset_value()205             void reset_value() {}
206 
reset_bol(bool bol)207             void reset_bol(bool bol) { bol_ = bol; }
208 
209         protected:
210             Iterator& first_;
211             Iterator last_;
212 
213             next_token_functor next_token_;
214             get_state_name_type get_state_name_;
215 
216             bool bol_;      // helper storing whether last character was \n
217 
218             // silence MSVC warning C4512: assignment operator could not be generated
219             BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
220         };
221 
222         ///////////////////////////////////////////////////////////////////////
223         //  doesn't support lexer semantic actions, but supports state
224         template <typename Iterator, typename TokenValue>
225         class static_data<Iterator, mpl::false_, mpl::true_, TokenValue>
226           : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
227         {
228         protected:
229             typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
230             typedef typename base_type::char_type char_type;
231 
232         public:
233             typedef Iterator base_iterator_type;
234             typedef iterator_range<Iterator> token_value_type;
235             typedef token_value_type get_value_type;
236             typedef typename base_type::state_type state_type;
237             typedef typename base_type::state_name_type state_name_type;
238             typedef typename base_type::semantic_actions_type
239                 semantic_actions_type;
240 
241             // initialize the shared data
242             template <typename IterData>
static_data(IterData const & data,Iterator & first,Iterator const & last)243             static_data (IterData const& data, Iterator& first
244                   , Iterator const& last)
245               : base_type(data, first, last), state_(0)
246               , num_states_(data.num_states_) {}
247 
248             // The following functions are used by the implementation of the
249             // placeholder '_state'.
set_state_name(char_type const * new_state)250             void set_state_name (char_type const* new_state)
251             {
252                 std::size_t state_id = lexertl::detail::get_state_id(new_state
253                   , this->get_state_name_, num_states_);
254 
255                 // if the following assertion fires you've probably been using
256                 // a lexer state name which was not defined in your token
257                 // definition
258                 BOOST_ASSERT(state_id != boost::lexer::npos);
259 
260                 if (state_id != boost::lexer::npos)
261                     state_ = state_id;
262             }
get_state_name() const263             char_type const* get_state_name() const
264             {
265                 return this->get_state_name_(state_);
266             }
get_state_id(char_type const * state) const267             std::size_t get_state_id(char_type const* state) const
268             {
269                 return lexertl::detail::get_state_id(state
270                   , this->get_state_name_, num_states_);
271             }
272 
273             // the functions next() and get_state() are used by the functor
274             // implementation below
275 
276             // The function next() tries to match the next token from the
277             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)278             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
279             {
280                 prev_bol = this->bol_;
281                 return this->next_token_(state_, this->bol_, end, this->last_
282                   , unique_id);
283             }
284 
get_state()285             std::size_t& get_state() { return state_; }
set_state(std::size_t state)286             void set_state(std::size_t state) { state_ = state; }
287 
288         protected:
289             std::size_t state_;
290             std::size_t num_states_;
291 
292             // silence MSVC warning C4512: assignment operator could not be generated
293             BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
294         };
295 
296         ///////////////////////////////////////////////////////////////////////
297         //  does support actors, but may have no state
298         template <typename Iterator, typename HasState, typename TokenValue>
299         class static_data<Iterator, mpl::true_, HasState, TokenValue>
300           : public static_data<Iterator, mpl::false_, HasState, TokenValue>
301         {
302         public:
303             typedef semantic_actions<Iterator, HasState, static_data>
304                 semantic_actions_type;
305 
306         protected:
307             typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
308                 base_type;
309             typedef typename base_type::char_type char_type;
310             typedef typename semantic_actions_type::functor_wrapper_type
311                 functor_wrapper_type;
312 
313         public:
314             typedef Iterator base_iterator_type;
315             typedef TokenValue token_value_type;
316             typedef TokenValue const& get_value_type;
317             typedef typename base_type::state_type state_type;
318             typedef typename base_type::state_name_type state_name_type;
319 
320             typedef detail::wrap_action<functor_wrapper_type
321               , Iterator, static_data, std::size_t> wrap_action_type;
322 
323             template <typename IterData>
static_data(IterData const & data,Iterator & first,Iterator const & last)324             static_data (IterData const& data, Iterator& first
325                   , Iterator const& last)
326               : base_type(data, first, last)
327               , actions_(data.actions_), hold_()
328               , value_(iterator_range<Iterator>(first, last))
329               , has_value_(false)
330               , has_hold_(false)
331             {}
332 
333             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)334             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
335               , std::size_t& id, std::size_t unique_id, Iterator& end)
336             {
337                 return actions_.invoke_actions(state, id, unique_id, end, *this);
338             }
339 
340             // The function less() is used by the implementation of the support
341             // function lex::less(). Its functionality is equivalent to flex'
342             // function yyless(): it returns an iterator positioned to the
343             // nth input character beyond the current start iterator (i.e. by
344             // assigning the return value to the placeholder '_end' it is
345             // possible to return all but the first n characters of the current
346             // token back to the input stream).
less(Iterator & it,int n)347             Iterator const& less(Iterator& it, int n)
348             {
349                 it = this->get_first();
350                 std::advance(it, n);
351                 return it;
352             }
353 
354             // The function more() is used by the implementation of the support
355             // function lex::more(). Its functionality is equivalent to flex'
356             // function yymore(): it tells the lexer that the next time it
357             // matches a rule, the corresponding token should be appended onto
358             // the current token value rather than replacing it.
more()359             void more()
360             {
361                 hold_ = this->get_first();
362                 has_hold_ = true;
363             }
364 
365             // The function lookahead() is used by the implementation of the
366             // support function lex::lookahead. It can be used to implement
367             // lookahead for lexer engines not supporting constructs like flex'
368             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))369             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
370             {
371                 Iterator end = end_;
372                 std::size_t unique_id = boost::lexer::npos;
373                 bool bol = this->bol_;
374 
375                 if (std::size_t(~0) == state)
376                     state = this->state_;
377 
378                 return id == this->next_token_(
379                     state, bol, end, this->get_eoi(), unique_id);
380             }
381 
382             // The adjust_start() and revert_adjust_start() are helper
383             // functions needed to implement the functionality required for
384             // lex::more(). It is called from the functor body below.
adjust_start()385             bool adjust_start()
386             {
387                 if (!has_hold_)
388                     return false;
389 
390                 std::swap(this->get_first(), hold_);
391                 has_hold_ = false;
392                 return true;
393             }
revert_adjust_start()394             void revert_adjust_start()
395             {
396                 // this will be called only if adjust_start above returned true
397                 std::swap(this->get_first(), hold_);
398                 has_hold_ = true;
399             }
400 
get_value() const401             TokenValue const& get_value() const
402             {
403                 if (!has_value_) {
404                     value_ = iterator_range<Iterator>(this->get_first(), end_);
405                     has_value_ = true;
406                 }
407                 return value_;
408             }
409             template <typename Value>
set_value(Value const & val)410             void set_value(Value const& val)
411             {
412                 value_ = val;
413                 has_value_ = true;
414             }
set_end(Iterator const & it)415             void set_end(Iterator const& it)
416             {
417                 end_ = it;
418             }
has_value() const419             bool has_value() const { return has_value_; }
reset_value()420             void reset_value() { has_value_ = false; }
421 
422         protected:
423             semantic_actions_type const& actions_;
424             Iterator hold_;     // iterator needed to support lex::more()
425             Iterator end_;      // iterator pointing to end of matched token
426             mutable TokenValue value_;  // token value to use
427             mutable bool has_value_;    // 'true' if value_ is valid
428             bool has_hold_;     // 'true' if hold_ is valid
429 
430             // silence MSVC warning C4512: assignment operator could not be generated
431             BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
432         };
433 
434         ///////////////////////////////////////////////////////////////////////
435         //  does support lexer semantic actions, may support state, is used for
436         //  position_token exposing exactly one type
437         template <typename Iterator, typename HasState, typename TokenValue>
438         class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
439           : public static_data<Iterator, mpl::false_, HasState, TokenValue>
440         {
441         public:
442             typedef semantic_actions<Iterator, HasState, static_data>
443                 semantic_actions_type;
444 
445         protected:
446             typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
447                 base_type;
448             typedef typename base_type::char_type char_type;
449             typedef typename semantic_actions_type::functor_wrapper_type
450                 functor_wrapper_type;
451 
452         public:
453             typedef Iterator base_iterator_type;
454             typedef boost::optional<TokenValue> token_value_type;
455             typedef boost::optional<TokenValue> const& get_value_type;
456             typedef typename base_type::state_type state_type;
457             typedef typename base_type::state_name_type state_name_type;
458 
459             typedef detail::wrap_action<functor_wrapper_type
460               , Iterator, static_data, std::size_t> wrap_action_type;
461 
462             template <typename IterData>
static_data(IterData const & data_,Iterator & first,Iterator const & last)463             static_data (IterData const& data_, Iterator& first, Iterator const& last)
464               : base_type(data_, first, last)
465               , actions_(data_.actions_), hold_()
466               , has_value_(false), has_hold_(false)
467             {
468                 spirit::traits::assign_to(first, last, value_);
469                 has_value_ = true;
470             }
471 
472             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)473             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
474               , std::size_t& id, std::size_t unique_id, Iterator& end)
475             {
476                 return actions_.invoke_actions(state, id, unique_id, end, *this);
477             }
478 
479             // The function less() is used by the implementation of the support
480             // function lex::less(). Its functionality is equivalent to flex'
481             // function yyless(): it returns an iterator positioned to the
482             // nth input character beyond the current start iterator (i.e. by
483             // assigning the return value to the placeholder '_end' it is
484             // possible to return all but the first n characters of the current
485             // token back to the input stream).
less(Iterator & it,int n)486             Iterator const& less(Iterator& it, int n)
487             {
488                 it = this->get_first();
489                 std::advance(it, n);
490                 return it;
491             }
492 
493             // The function more() is used by the implementation of the support
494             // function lex::more(). Its functionality is equivalent to flex'
495             // function yymore(): it tells the lexer that the next time it
496             // matches a rule, the corresponding token should be appended onto
497             // the current token value rather than replacing it.
more()498             void more()
499             {
500                 hold_ = this->get_first();
501                 has_hold_ = true;
502             }
503 
504             // The function lookahead() is used by the implementation of the
505             // support function lex::lookahead. It can be used to implement
506             // lookahead for lexer engines not supporting constructs like flex'
507             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))508             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
509             {
510                 Iterator end = end_;
511                 std::size_t unique_id = boost::lexer::npos;
512                 bool bol = this->bol_;
513 
514                 if (std::size_t(~0) == state)
515                     state = this->state_;
516 
517                 return id == this->next_token_(
518                     state, bol, end, this->get_eoi(), unique_id);
519             }
520 
521             // The adjust_start() and revert_adjust_start() are helper
522             // functions needed to implement the functionality required for
523             // lex::more(). It is called from the functor body below.
adjust_start()524             bool adjust_start()
525             {
526                 if (!has_hold_)
527                     return false;
528 
529                 std::swap(this->get_first(), hold_);
530                 has_hold_ = false;
531                 return true;
532             }
revert_adjust_start()533             void revert_adjust_start()
534             {
535                 // this will be called only if adjust_start above returned true
536                 std::swap(this->get_first(), hold_);
537                 has_hold_ = true;
538             }
539 
get_value() const540             TokenValue const& get_value() const
541             {
542                 if (!has_value_) {
543                     spirit::traits::assign_to(this->get_first(), end_, value_);
544                     has_value_ = true;
545                 }
546                 return value_;
547             }
548             template <typename Value>
set_value(Value const & val)549             void set_value(Value const& val)
550             {
551                 value_ = val;
552                 has_value_ = true;
553             }
set_end(Iterator const & it)554             void set_end(Iterator const& it)
555             {
556                 end_ = it;
557             }
has_value() const558             bool has_value() const { return has_value_; }
reset_value()559             void reset_value() { has_value_ = false; }
560 
561         protected:
562             semantic_actions_type const& actions_;
563             Iterator hold_;     // iterator needed to support lex::more()
564             Iterator end_;      // iterator pointing to end of matched token
565             mutable token_value_type value_;  // token value to use
566             mutable bool has_value_;    // 'true' if value_ is valid
567             bool has_hold_;     // 'true' if hold_ is valid
568 
569             // silence MSVC warning C4512: assignment operator could not be generated
570             BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
571         };
572     }
573 }}}}
574 
575 #endif
576