1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM) 7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/mpl/bool.hpp> 14 #include <boost/detail/workaround.hpp> 15 #include <boost/spirit/home/lex/lexer/pass_flags.hpp> 16 #include <boost/assert.hpp> 17 #include <iterator> // for std::iterator_traits 18 19 #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310) 20 #define BOOST_SPIRIT_STATIC_EOF 1 21 #define BOOST_SPIRIT_EOF_PREFIX static 22 #else 23 #define BOOST_SPIRIT_EOF_PREFIX 24 #endif 25 26 namespace boost { namespace spirit { namespace lex { namespace lexertl 27 { 28 /////////////////////////////////////////////////////////////////////////// 29 // 30 // functor is a template usable as the functor object for the 31 // multi_pass iterator allowing to wrap a lexertl based dfa into a 32 // iterator based interface. 33 // 34 // Token: the type of the tokens produced by this functor 35 // this needs to expose a constructor with the following 36 // prototype: 37 // 38 // Token(std::size_t id, std::size_t state, 39 // Iterator start, Iterator end) 40 // 41 // where 'id' is the token id, state is the lexer state, 42 // this token has been matched in, and 'first' and 'end' 43 // mark the start and the end of the token with respect 44 // to the underlying character stream. 45 // FunctorData: 46 // this is expected to encapsulate the shared part of the 47 // functor (see lex/lexer/lexertl/functor_data.hpp for an 48 // example and documentation). 49 // Iterator: the type of the underlying iterator 50 // SupportsActors: 51 // this is expected to be a mpl::bool_, if mpl::true_ the 52 // functor invokes functors which (optionally) have 53 // been attached to the token definitions. 54 // SupportState: 55 // this is expected to be a mpl::bool_, if mpl::true_ the 56 // functor supports different lexer states, 57 // otherwise no lexer state is supported. 58 // 59 /////////////////////////////////////////////////////////////////////////// 60 template <typename Token 61 , template <typename, typename, typename, typename> class FunctorData 62 , typename Iterator = typename Token::iterator_type 63 , typename SupportsActors = mpl::false_ 64 , typename SupportsState = typename Token::has_state> 65 class functor 66 { 67 public: 68 typedef typename 69 std::iterator_traits<Iterator>::value_type 70 char_type; 71 72 private: 73 // Needed by compilers not implementing the resolution to DR45. For 74 // reference, see 75 // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. 76 typedef typename Token::token_value_type token_value_type; 77 friend class FunctorData<Iterator, SupportsActors, SupportsState 78 , token_value_type>; 79 80 // Helper template allowing to assign a value on exit 81 template <typename T> 82 struct assign_on_exit 83 { assign_on_exitboost::spirit::lex::lexertl::functor::assign_on_exit84 assign_on_exit(T& dst, T const& src) 85 : dst_(dst), src_(src) {} 86 ~assign_on_exitboost::spirit::lex::lexertl::functor::assign_on_exit87 ~assign_on_exit() 88 { 89 dst_ = src_; 90 } 91 92 T& dst_; 93 T const& src_; 94 95 // silence MSVC warning C4512: assignment operator could not be generated 96 BOOST_DELETED_FUNCTION(assign_on_exit& operator= (assign_on_exit const&)) 97 }; 98 99 public: functor()100 functor() {} 101 102 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310) 103 // somehow VC7.1 needs this (meaningless) assignment operator operator =(functor const & rhs)104 functor& operator=(functor const& rhs) 105 { 106 return *this; 107 } 108 #endif 109 110 /////////////////////////////////////////////////////////////////////// 111 // interface to the iterator_policies::split_functor_input policy 112 typedef Token result_type; 113 typedef functor unique; 114 typedef FunctorData<Iterator, SupportsActors, SupportsState 115 , token_value_type> shared; 116 117 BOOST_SPIRIT_EOF_PREFIX result_type const eof; 118 119 /////////////////////////////////////////////////////////////////////// 120 typedef Iterator iterator_type; 121 typedef typename shared::semantic_actions_type semantic_actions_type; 122 typedef typename shared::next_token_functor next_token_functor; 123 typedef typename shared::get_state_name_type get_state_name_type; 124 125 // this is needed to wrap the semantic actions in a proper way 126 typedef typename shared::wrap_action_type wrap_action_type; 127 128 /////////////////////////////////////////////////////////////////////// 129 template <typename MultiPass> get_next(MultiPass & mp,result_type & result)130 static result_type& get_next(MultiPass& mp, result_type& result) 131 { 132 typedef typename result_type::id_type id_type; 133 134 shared& data = mp.shared()->ftor; 135 for(;;) 136 { 137 if (data.get_first() == data.get_last()) 138 #if defined(BOOST_SPIRIT_STATIC_EOF) 139 return result = eof; 140 #else 141 return result = mp.ftor.eof; 142 #endif 143 144 data.reset_value(); 145 Iterator end = data.get_first(); 146 std::size_t unique_id = boost::lexer::npos; 147 bool prev_bol = false; 148 149 // lexer matching might change state 150 std::size_t state = data.get_state(); 151 std::size_t id = data.next(end, unique_id, prev_bol); 152 153 if (boost::lexer::npos == id) { // no match 154 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) 155 std::string next; 156 Iterator it = data.get_first(); 157 for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) 158 next += *it; 159 160 std::cerr << "Not matched, in state: " << state 161 << ", lookahead: >" << next << "<" << std::endl; 162 #endif 163 return result = result_type(0); 164 } 165 else if (0 == id) { // EOF reached 166 #if defined(BOOST_SPIRIT_STATIC_EOF) 167 return result = eof; 168 #else 169 return result = mp.ftor.eof; 170 #endif 171 } 172 173 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) 174 { 175 std::string next; 176 Iterator it = end; 177 for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i) 178 next += *it; 179 180 std::cerr << "Matched: " << id << ", in state: " 181 << state << ", string: >" 182 << std::basic_string<char_type>(data.get_first(), end) << "<" 183 << ", lookahead: >" << next << "<" << std::endl; 184 if (data.get_state() != state) { 185 std::cerr << "Switched to state: " 186 << data.get_state() << std::endl; 187 } 188 } 189 #endif 190 // account for a possibly pending lex::more(), i.e. moving 191 // data.first_ back to the start of the previously matched token. 192 bool adjusted = data.adjust_start(); 193 194 // set the end of the matched input sequence in the token data 195 data.set_end(end); 196 197 // invoke attached semantic actions, if defined, might change 198 // state, id, data.first_, and/or end 199 BOOST_SCOPED_ENUM(pass_flags) pass = 200 data.invoke_actions(state, id, unique_id, end); 201 202 if (data.has_value()) { 203 // return matched token using the token value as set before 204 // using data.set_value(), advancing 'data.first_' past the 205 // matched sequence 206 assign_on_exit<Iterator> on_exit(data.get_first(), end); 207 return result = result_type(id_type(id), state, data.get_value()); 208 } 209 else if (pass_flags::pass_normal == pass) { 210 // return matched token, advancing 'data.first_' past the 211 // matched sequence 212 assign_on_exit<Iterator> on_exit(data.get_first(), end); 213 return result = result_type(id_type(id), state, data.get_first(), end); 214 } 215 else if (pass_flags::pass_fail == pass) { 216 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) 217 std::cerr << "Matching forced to fail" << std::endl; 218 #endif 219 // if the data.first_ got adjusted above, revert this adjustment 220 if (adjusted) 221 data.revert_adjust_start(); 222 223 // one of the semantic actions signaled no-match 224 data.reset_bol(prev_bol); 225 if (state != data.get_state()) 226 continue; // retry matching if state has changed 227 228 // if the state is unchanged repeating the match wouldn't 229 // move the input forward, causing an infinite loop 230 return result = result_type(0); 231 } 232 233 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) 234 std::cerr << "Token ignored, continuing matching" << std::endl; 235 #endif 236 // if this token needs to be ignored, just repeat the matching, 237 // while starting right after the current match 238 data.get_first() = end; 239 } 240 } 241 242 // set_state are propagated up to the iterator interface, allowing to 243 // manipulate the current lexer state through any of the exposed 244 // iterators. 245 template <typename MultiPass> set_state(MultiPass & mp,std::size_t state)246 static std::size_t set_state(MultiPass& mp, std::size_t state) 247 { 248 std::size_t oldstate = mp.shared()->ftor.get_state(); 249 mp.shared()->ftor.set_state(state); 250 251 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) 252 std::cerr << "Switching state from: " << oldstate 253 << " to: " << state 254 << std::endl; 255 #endif 256 return oldstate; 257 } 258 259 template <typename MultiPass> get_state(MultiPass & mp)260 static std::size_t get_state(MultiPass& mp) 261 { 262 return mp.shared()->ftor.get_state(); 263 } 264 265 template <typename MultiPass> 266 static std::size_t map_state(MultiPass const & mp,char_type const * statename)267 map_state(MultiPass const& mp, char_type const* statename) 268 { 269 return mp.shared()->ftor.get_state_id(statename); 270 } 271 272 // we don't need this, but it must be there 273 template <typename MultiPass> destroy(MultiPass const &)274 static void destroy(MultiPass const&) {} 275 }; 276 277 #if defined(BOOST_SPIRIT_STATIC_EOF) 278 /////////////////////////////////////////////////////////////////////////// 279 // eof token 280 /////////////////////////////////////////////////////////////////////////// 281 template <typename Token 282 , template <typename, typename, typename, typename> class FunctorData 283 , typename Iterator, typename SupportsActors, typename SupportsState> 284 typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const 285 functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof = 286 typename functor<Token, FunctorData, Iterator, SupportsActors 287 , SupportsState>::result_type(); 288 #endif 289 290 }}}} 291 292 #undef BOOST_SPIRIT_EOF_PREFIX 293 #undef BOOST_SPIRIT_STATIC_EOF 294 295 #endif 296