1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM)
7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/mpl/bool.hpp>
14 #include <boost/detail/workaround.hpp>
15 #include <boost/spirit/home/lex/lexer/pass_flags.hpp>
16 #include <boost/assert.hpp>
17 #include <iterator> // for std::iterator_traits
18 
19 #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
20 #define BOOST_SPIRIT_STATIC_EOF 1
21 #define BOOST_SPIRIT_EOF_PREFIX static
22 #else
23 #define BOOST_SPIRIT_EOF_PREFIX
24 #endif
25 
26 namespace boost { namespace spirit { namespace lex { namespace lexertl
27 {
28     ///////////////////////////////////////////////////////////////////////////
29     //
30     //  functor is a template usable as the functor object for the
31     //  multi_pass iterator allowing to wrap a lexertl based dfa into a
32     //  iterator based interface.
33     //
34     //    Token:      the type of the tokens produced by this functor
35     //                this needs to expose a constructor with the following
36     //                prototype:
37     //
38     //                Token(std::size_t id, std::size_t state,
39     //                      Iterator start, Iterator end)
40     //
41     //                where 'id' is the token id, state is the lexer state,
42     //                this token has been matched in, and 'first' and 'end'
43     //                mark the start and the end of the token with respect
44     //                to the underlying character stream.
45     //    FunctorData:
46     //                this is expected to encapsulate the shared part of the
47     //                functor (see lex/lexer/lexertl/functor_data.hpp for an
48     //                example and documentation).
49     //    Iterator:   the type of the underlying iterator
50     //    SupportsActors:
51     //                this is expected to be a mpl::bool_, if mpl::true_ the
52     //                functor invokes functors which (optionally) have
53     //                been attached to the token definitions.
54     //    SupportState:
55     //                this is expected to be a mpl::bool_, if mpl::true_ the
56     //                functor supports different lexer states,
57     //                otherwise no lexer state is supported.
58     //
59     ///////////////////////////////////////////////////////////////////////////
60     template <typename Token
61       , template <typename, typename, typename, typename> class FunctorData
62       , typename Iterator = typename Token::iterator_type
63       , typename SupportsActors = mpl::false_
64       , typename SupportsState = typename Token::has_state>
65     class functor
66     {
67     public:
68         typedef typename
69             std::iterator_traits<Iterator>::value_type
70         char_type;
71 
72     private:
73         // Needed by compilers not implementing the resolution to DR45. For
74         // reference, see
75         // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
76         typedef typename Token::token_value_type token_value_type;
77         friend class FunctorData<Iterator, SupportsActors, SupportsState
78           , token_value_type>;
79 
80         // Helper template allowing to assign a value on exit
81         template <typename T>
82         struct assign_on_exit
83         {
assign_on_exitboost::spirit::lex::lexertl::functor::assign_on_exit84             assign_on_exit(T& dst, T const& src)
85               : dst_(dst), src_(src) {}
86 
~assign_on_exitboost::spirit::lex::lexertl::functor::assign_on_exit87             ~assign_on_exit()
88             {
89                 dst_ = src_;
90             }
91 
92             T& dst_;
93             T const& src_;
94 
95             // silence MSVC warning C4512: assignment operator could not be generated
96             BOOST_DELETED_FUNCTION(assign_on_exit& operator= (assign_on_exit const&))
97         };
98 
99     public:
functor()100         functor() {}
101 
102 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
103         // somehow VC7.1 needs this (meaningless) assignment operator
operator =(functor const & rhs)104         functor& operator=(functor const& rhs)
105         {
106             return *this;
107         }
108 #endif
109 
110         ///////////////////////////////////////////////////////////////////////
111         // interface to the iterator_policies::split_functor_input policy
112         typedef Token result_type;
113         typedef functor unique;
114         typedef FunctorData<Iterator, SupportsActors, SupportsState
115           , token_value_type> shared;
116 
117         BOOST_SPIRIT_EOF_PREFIX result_type const eof;
118 
119         ///////////////////////////////////////////////////////////////////////
120         typedef Iterator iterator_type;
121         typedef typename shared::semantic_actions_type semantic_actions_type;
122         typedef typename shared::next_token_functor next_token_functor;
123         typedef typename shared::get_state_name_type get_state_name_type;
124 
125         // this is needed to wrap the semantic actions in a proper way
126         typedef typename shared::wrap_action_type wrap_action_type;
127 
128         ///////////////////////////////////////////////////////////////////////
129         template <typename MultiPass>
get_next(MultiPass & mp,result_type & result)130         static result_type& get_next(MultiPass& mp, result_type& result)
131         {
132             typedef typename result_type::id_type id_type;
133 
134             shared& data = mp.shared()->ftor;
135             for(;;)
136             {
137                 if (data.get_first() == data.get_last())
138 #if defined(BOOST_SPIRIT_STATIC_EOF)
139                     return result = eof;
140 #else
141                     return result = mp.ftor.eof;
142 #endif
143 
144                 data.reset_value();
145                 Iterator end = data.get_first();
146                 std::size_t unique_id = boost::lexer::npos;
147                 bool prev_bol = false;
148 
149                 // lexer matching might change state
150                 std::size_t state = data.get_state();
151                 std::size_t id = data.next(end, unique_id, prev_bol);
152 
153                 if (boost::lexer::npos == id) {   // no match
154 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
155                     std::string next;
156                     Iterator it = data.get_first();
157                     for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
158                         next += *it;
159 
160                     std::cerr << "Not matched, in state: " << state
161                               << ", lookahead: >" << next << "<" << std::endl;
162 #endif
163                     return result = result_type(0);
164                 }
165                 else if (0 == id) {         // EOF reached
166 #if defined(BOOST_SPIRIT_STATIC_EOF)
167                     return result = eof;
168 #else
169                     return result = mp.ftor.eof;
170 #endif
171                 }
172 
173 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
174                 {
175                     std::string next;
176                     Iterator it = end;
177                     for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
178                         next += *it;
179 
180                     std::cerr << "Matched: " << id << ", in state: "
181                               << state << ", string: >"
182                               << std::basic_string<char_type>(data.get_first(), end) << "<"
183                               << ", lookahead: >" << next << "<" << std::endl;
184                     if (data.get_state() != state) {
185                         std::cerr << "Switched to state: "
186                                   << data.get_state() << std::endl;
187                     }
188                 }
189 #endif
190                 // account for a possibly pending lex::more(), i.e. moving
191                 // data.first_ back to the start of the previously matched token.
192                 bool adjusted = data.adjust_start();
193 
194                 // set the end of the matched input sequence in the token data
195                 data.set_end(end);
196 
197                 // invoke attached semantic actions, if defined, might change
198                 // state, id, data.first_, and/or end
199                 BOOST_SCOPED_ENUM(pass_flags) pass =
200                     data.invoke_actions(state, id, unique_id, end);
201 
202                 if (data.has_value()) {
203                     // return matched token using the token value as set before
204                     // using data.set_value(), advancing 'data.first_' past the
205                     // matched sequence
206                     assign_on_exit<Iterator> on_exit(data.get_first(), end);
207                     return result = result_type(id_type(id), state, data.get_value());
208                 }
209                 else if (pass_flags::pass_normal == pass) {
210                     // return matched token, advancing 'data.first_' past the
211                     // matched sequence
212                     assign_on_exit<Iterator> on_exit(data.get_first(), end);
213                     return result = result_type(id_type(id), state, data.get_first(), end);
214                 }
215                 else if (pass_flags::pass_fail == pass) {
216 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
217                     std::cerr << "Matching forced to fail" << std::endl;
218 #endif
219                     // if the data.first_ got adjusted above, revert this adjustment
220                     if (adjusted)
221                         data.revert_adjust_start();
222 
223                     // one of the semantic actions signaled no-match
224                     data.reset_bol(prev_bol);
225                     if (state != data.get_state())
226                         continue;       // retry matching if state has changed
227 
228                     // if the state is unchanged repeating the match wouldn't
229                     // move the input forward, causing an infinite loop
230                     return result = result_type(0);
231                 }
232 
233 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
234                 std::cerr << "Token ignored, continuing matching" << std::endl;
235 #endif
236             // if this token needs to be ignored, just repeat the matching,
237             // while starting right after the current match
238                 data.get_first() = end;
239             }
240         }
241 
242         // set_state are propagated up to the iterator interface, allowing to
243         // manipulate the current lexer state through any of the exposed
244         // iterators.
245         template <typename MultiPass>
set_state(MultiPass & mp,std::size_t state)246         static std::size_t set_state(MultiPass& mp, std::size_t state)
247         {
248             std::size_t oldstate = mp.shared()->ftor.get_state();
249             mp.shared()->ftor.set_state(state);
250 
251 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
252             std::cerr << "Switching state from: " << oldstate
253                       << " to: " << state
254                       << std::endl;
255 #endif
256             return oldstate;
257         }
258 
259         template <typename MultiPass>
get_state(MultiPass & mp)260         static std::size_t get_state(MultiPass& mp)
261         {
262             return mp.shared()->ftor.get_state();
263         }
264 
265         template <typename MultiPass>
266         static std::size_t
map_state(MultiPass const & mp,char_type const * statename)267         map_state(MultiPass const& mp, char_type const* statename)
268         {
269             return mp.shared()->ftor.get_state_id(statename);
270         }
271 
272         // we don't need this, but it must be there
273         template <typename MultiPass>
destroy(MultiPass const &)274         static void destroy(MultiPass const&) {}
275     };
276 
277 #if defined(BOOST_SPIRIT_STATIC_EOF)
278     ///////////////////////////////////////////////////////////////////////////
279     //  eof token
280     ///////////////////////////////////////////////////////////////////////////
281     template <typename Token
282       , template <typename, typename, typename, typename> class FunctorData
283       , typename Iterator, typename SupportsActors, typename SupportsState>
284     typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const
285         functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof =
286             typename functor<Token, FunctorData, Iterator, SupportsActors
287               , SupportsState>::result_type();
288 #endif
289 
290 }}}}
291 
292 #undef BOOST_SPIRIT_EOF_PREFIX
293 #undef BOOST_SPIRIT_STATIC_EOF
294 
295 #endif
296