1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM)
7 #define BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
14 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
15 #include <boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp>
16 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
18 #if defined(BOOST_SPIRIT_DEBUG)
19 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
20 #endif
21 #include <iterator> // for std::iterator_traits
22 
23 namespace boost { namespace spirit { namespace lex { namespace lexertl
24 {
25     ///////////////////////////////////////////////////////////////////////////
26     //  forward declaration
27     ///////////////////////////////////////////////////////////////////////////
28     namespace static_
29     {
30         struct lexer;
31     }
32 
33     ///////////////////////////////////////////////////////////////////////////
34     //
35     //  Every lexer type to be used as a lexer for Spirit has to conform to
36     //  the following public interface:
37     //
38     //    typedefs:
39     //        iterator_type   The type of the iterator exposed by this lexer.
40     //        token_type      The type of the tokens returned from the exposed
41     //                        iterators.
42     //
43     //    functions:
44     //        default constructor
45     //                        Since lexers are instantiated as base classes
46     //                        only it might be a good idea to make this
47     //                        constructor protected.
48     //        begin, end      Return a pair of iterators, when dereferenced
49     //                        returning the sequence of tokens recognized in
50     //                        the input stream given as the parameters to the
51     //                        begin() function.
52     //        add_token       Should add the definition of a token to be
53     //                        recognized by this lexer.
54     //        clear           Should delete all current token definitions
55     //                        associated with the given state of this lexer
56     //                        object.
57     //
58     //    template parameters:
59     //        Token           The type of the tokens to be returned from the
60     //                        exposed token iterator.
61     //        LexerTables     See explanations below.
62     //        Iterator        The type of the iterator used to access the
63     //                        underlying character stream.
64     //        Functor         The type of the InputPolicy to use to instantiate
65     //                        the multi_pass iterator type to be used as the
66     //                        token iterator (returned from begin()/end()).
67     //
68     //    Additionally, this implementation of a static lexer has a template
69     //    parameter LexerTables allowing to customize the static lexer tables
70     //    to be used. The LexerTables is expected to be a type exposing
71     //    the following functions:
72     //
73     //        static std::size_t const state_count()
74     //
75     //                This function needs toreturn the number of lexer states
76     //                contained in the table returned from the state_names()
77     //                function.
78     //
79     //        static char const* const* state_names()
80     //
81     //                This function needs to return a pointer to a table of
82     //                names of all lexer states. The table needs to have as
83     //                much entries as the state_count() function returns
84     //
85     //        template<typename Iterator>
86     //        std::size_t next(std::size_t &start_state_, Iterator const& start_
87     //          , Iterator &start_token_, Iterator const& end_
88     //          , std::size_t& unique_id_);
89     //
90     //                This function is expected to return the next matched
91     //                token from the underlying input stream.
92     //
93     ///////////////////////////////////////////////////////////////////////////
94 
95     ///////////////////////////////////////////////////////////////////////////
96     //
97     //  The static_lexer class is a implementation of a Spirit.Lex
98     //  lexer on top of Ben Hanson's lexertl library (For more information
99     //  about lexertl go here: http://www.benhanson.net/lexertl.html).
100     //
101     //  This class is designed to be used in conjunction with a generated,
102     //  static lexer. For more information see the documentation (The Static
103     //  Lexer Model).
104     //
105     //  This class is supposed to be used as the first and only template
106     //  parameter while instantiating instances of a lex::lexer class.
107     //
108     ///////////////////////////////////////////////////////////////////////////
109     template <typename Token = token<>
110       , typename LexerTables = static_::lexer
111       , typename Iterator = typename Token::iterator_type
112       , typename Functor = functor<Token, detail::static_data, Iterator> >
113     class static_lexer
114     {
115     private:
true_boost::spirit::lex::lexertl::static_lexer::dummy116         struct dummy { void true_() {} };
117         typedef void (dummy::*safe_bool)();
118 
119     public:
120         // object is always valid
operator safe_bool() const121         operator safe_bool() const { return &dummy::true_; }
122 
123         typedef typename std::iterator_traits<Iterator>::value_type char_type;
124         typedef std::basic_string<char_type> string_type;
125 
126         //  Every lexer type to be used as a lexer for Spirit has to conform to
127         //  a public interface
128         typedef Token token_type;
129         typedef typename Token::id_type id_type;
130         typedef iterator<Functor> iterator_type;
131 
132     private:
133         // this type is purely used for the iterator_type construction below
134         struct iterator_data_type
135         {
136             typedef typename Functor::next_token_functor next_token_functor;
137             typedef typename Functor::semantic_actions_type semantic_actions_type;
138             typedef typename Functor::get_state_name_type get_state_name_type;
139 
iterator_data_typeboost::spirit::lex::lexertl::static_lexer::iterator_data_type140             iterator_data_type(next_token_functor next
141                   , semantic_actions_type const& actions
142                   , get_state_name_type get_state_name, std::size_t num_states
143                   , bool bol)
144               : next_(next), actions_(actions), get_state_name_(get_state_name)
145               , num_states_(num_states), bol_(bol)
146             {}
147 
148             next_token_functor next_;
149             semantic_actions_type const& actions_;
150             get_state_name_type get_state_name_;
151             std::size_t num_states_;
152             bool bol_;
153 
154             // silence MSVC warning C4512: assignment operator could not be generated
155             BOOST_DELETED_FUNCTION(iterator_data_type& operator= (iterator_data_type const&))
156         };
157 
158         typedef LexerTables tables_type;
159 
160         // The following static assertion fires if the referenced static lexer
161         // tables are generated by a different static lexer version as used for
162         // the current compilation unit. Please regenerate your static lexer
163         // tables before trying to create a static_lexer<> instance.
164         BOOST_SPIRIT_ASSERT_MSG(
165             tables_type::static_version == SPIRIT_STATIC_LEXER_VERSION
166           , incompatible_static_lexer_version, (LexerTables));
167 
168     public:
169         //  Return the start iterator usable for iterating over the generated
170         //  tokens, the generated function next_token(...) is called to match
171         //  the next token from the input.
172         template <typename Iterator_>
begin(Iterator_ & first,Iterator_ const & last,char_type const * initial_state=0) const173         iterator_type begin(Iterator_& first, Iterator_ const& last
174           , char_type const* initial_state = 0) const
175         {
176             iterator_data_type iterator_data(
177                     &tables_type::template next<Iterator_>, actions_
178                   , &tables_type::state_name, tables_type::state_count()
179                   , tables_type::supports_bol
180                 );
181             return iterator_type(iterator_data, first, last, initial_state);
182         }
183 
184         //  Return the end iterator usable to stop iterating over the generated
185         //  tokens.
end() const186         iterator_type end() const
187         {
188             return iterator_type();
189         }
190 
191     protected:
192         //  Lexer instances can be created by means of a derived class only.
static_lexer(unsigned int)193         static_lexer(unsigned int) : unique_id_(0) {}
194 
195     public:
196         // interface for token definition management
add_token(char_type const *,char_type,std::size_t,char_type const *)197         std::size_t add_token (char_type const*, char_type, std::size_t
198           , char_type const*)
199         {
200             return unique_id_++;
201         }
add_token(char_type const *,string_type const &,std::size_t,char_type const *)202         std::size_t add_token (char_type const*, string_type const&
203           , std::size_t, char_type const*)
204         {
205             return unique_id_++;
206         }
207 
208         // interface for pattern definition management
add_pattern(char_type const *,string_type const &,string_type const &)209         void add_pattern (char_type const*, string_type const&
210           , string_type const&) {}
211 
clear(char_type const *)212         void clear(char_type const*) {}
213 
add_state(char_type const * state)214         std::size_t add_state(char_type const* state)
215         {
216             return detail::get_state_id(state, &tables_type::state_name
217               , tables_type::state_count());
218         }
initial_state() const219         string_type initial_state() const
220         {
221             return tables_type::state_name(0);
222         }
223 
224         // register a semantic action with the given id
225         template <typename F>
add_action(id_type unique_id,std::size_t state,F act)226         void add_action(id_type unique_id, std::size_t state, F act)
227         {
228             typedef typename Functor::wrap_action_type wrapper_type;
229             actions_.add_action(unique_id, state, wrapper_type::call(act));
230         }
231 
init_dfa(bool=false) const232         bool init_dfa(bool /*minimize*/ = false) const { return true; }
233 
234     private:
235         typename Functor::semantic_actions_type actions_;
236         std::size_t unique_id_;
237     };
238 
239     ///////////////////////////////////////////////////////////////////////////
240     //
241     //  The static_actor_lexer class is another implementation of a
242     //  Spirit.Lex lexer on top of Ben Hanson's lexertl library as outlined
243     //  above (For more information about lexertl go here:
244     //  http://www.benhanson.net/lexertl.html).
245     //
246     //  Just as the static_lexer class it is meant to be used with
247     //  a statically generated lexer as outlined above.
248     //
249     //  The only difference to the static_lexer class above is that
250     //  token_def definitions may have semantic (lexer) actions attached while
251     //  being defined:
252     //
253     //      int w;
254     //      token_def<> word = "[^ \t\n]+";
255     //      self = word[++ref(w)];        // see example: word_count_lexer
256     //
257     //  This class is supposed to be used as the first and only template
258     //  parameter while instantiating instances of a lex::lexer class.
259     //
260     ///////////////////////////////////////////////////////////////////////////
261     template <typename Token = token<>
262       , typename LexerTables = static_::lexer
263       , typename Iterator = typename Token::iterator_type
264       , typename Functor
265           = functor<Token, detail::static_data, Iterator, mpl::true_> >
266     class static_actor_lexer
267       : public static_lexer<Token, LexerTables, Iterator, Functor>
268     {
269     protected:
270         // Lexer instances can be created by means of a derived class only.
static_actor_lexer(unsigned int flags)271         static_actor_lexer(unsigned int flags)
272           : static_lexer<Token, LexerTables, Iterator, Functor>(flags)
273         {}
274     };
275 
276 }}}}
277 
278 #endif
279