1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM)
7 #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
14 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
15 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
16 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
17 #include <iterator> // for std::iterator_traits
18 #include <vector>
19 
20 namespace boost { namespace spirit { namespace lex { namespace lexertl
21 {
22     ///////////////////////////////////////////////////////////////////////////
23     template<typename Iterator>
24     class basic_iterator_tokeniser
25     {
26     public:
27         typedef std::vector<std::size_t> size_t_vector;
28         typedef typename std::iterator_traits<Iterator>::value_type char_type;
29 
next(boost::lexer::basic_state_machine<char_type> const & state_machine_,std::size_t & dfa_state_,bool & bol_,Iterator & start_token_,Iterator const & end_,std::size_t & unique_id_)30         static std::size_t next (
31             boost::lexer::basic_state_machine<char_type> const& state_machine_
32           , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
33           , Iterator const& end_, std::size_t& unique_id_)
34         {
35             if (start_token_ == end_)
36             {
37                 unique_id_ = boost::lexer::npos;
38                 return 0;
39             }
40 
41             bool bol = bol_;
42             boost::lexer::detail::internals const& internals_ =
43                 state_machine_.data();
44 
45         again:
46             std::size_t const* lookup_ = &internals_._lookup[dfa_state_]->
47                 front ();
48             std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_];
49             std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front ();
50 
51             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
52             Iterator curr_ = start_token_;
53             bool end_state_ = *ptr_ != 0;
54             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
55             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
56             std::size_t end_start_state_ = dfa_state_;
57             bool end_bol_ = bol_;
58             Iterator end_token_ = start_token_;
59 
60             while (curr_ != end_)
61             {
62                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
63                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
64 
65                 if (BOL_state_ && bol)
66                 {
67                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
68                 }
69                 else if (EOL_state_ && *curr_ == '\n')
70                 {
71                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
72                 }
73                 else
74                 {
75                     typedef typename
76                         std::iterator_traits<Iterator>::value_type
77                     value_type;
78                     typedef typename
79                         boost::lexer::char_traits<value_type>::index_type
80                     index_type;
81 
82                     index_type index =
83                         boost::lexer::char_traits<value_type>::call(*curr_++);
84                     bol = (index == '\n') ? true : false;
85                     std::size_t const state_ = ptr_[
86                         lookup_[static_cast<std::size_t>(index)]];
87 
88                     if (state_ == 0)
89                     {
90                         break;
91                     }
92 
93                     ptr_ = &dfa_[state_ * dfa_alphabet_];
94                 }
95 
96                 if (*ptr_)
97                 {
98                     end_state_ = true;
99                     id_ = *(ptr_ + boost::lexer::id_index);
100                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
101                     end_start_state_ = *(ptr_ + boost::lexer::state_index);
102                     end_bol_ = bol;
103                     end_token_ = curr_;
104                 }
105             }
106 
107             std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
108 
109             if (EOL_state_ && curr_ == end_)
110             {
111                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
112 
113                 if (*ptr_)
114                 {
115                     end_state_ = true;
116                     id_ = *(ptr_ + boost::lexer::id_index);
117                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
118                     end_start_state_ = *(ptr_ + boost::lexer::state_index);
119                     end_bol_ = bol;
120                     end_token_ = curr_;
121                 }
122             }
123 
124             if (end_state_) {
125                 // return longest match
126                 dfa_state_ = end_start_state_;
127                 start_token_ = end_token_;
128 
129                 if (id_ == 0)
130                 {
131                     bol = end_bol_;
132                     goto again;
133                 }
134                 else
135                 {
136                     bol_ = end_bol_;
137                 }
138             }
139             else {
140                 bol_ = (*start_token_ == '\n') ? true : false;
141                 id_ = boost::lexer::npos;
142                 uid_ = boost::lexer::npos;
143             }
144 
145             unique_id_ = uid_;
146             return id_;
147         }
148 
149         ///////////////////////////////////////////////////////////////////////
next(boost::lexer::basic_state_machine<char_type> const & state_machine_,bool & bol_,Iterator & start_token_,Iterator const & end_,std::size_t & unique_id_)150         static std::size_t next (
151             boost::lexer::basic_state_machine<char_type> const& state_machine_
152           , bool& bol_, Iterator &start_token_, Iterator const& end_
153           , std::size_t& unique_id_)
154         {
155             if (start_token_ == end_)
156             {
157                 unique_id_ = boost::lexer::npos;
158                 return 0;
159             }
160 
161             bool bol = bol_;
162             std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
163             std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
164             std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
165             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
166 
167             Iterator curr_ = start_token_;
168             bool end_state_ = *ptr_ != 0;
169             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
170             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
171             bool end_bol_ = bol_;
172             Iterator end_token_ = start_token_;
173 
174             while (curr_ != end_)
175             {
176                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
177                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
178 
179                 if (BOL_state_ && bol)
180                 {
181                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
182                 }
183                 else if (EOL_state_ && *curr_ == '\n')
184                 {
185                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
186                 }
187                 else
188                 {
189                     typedef typename
190                         std::iterator_traits<Iterator>::value_type
191                     value_type;
192                     typedef typename
193                         boost::lexer::char_traits<value_type>::index_type
194                     index_type;
195 
196                     index_type index =
197                         boost::lexer::char_traits<value_type>::call(*curr_++);
198                     bol = (index == '\n') ? true : false;
199                     std::size_t const state_ = ptr_[
200                         lookup_[static_cast<std::size_t>(index)]];
201 
202                     if (state_ == 0)
203                     {
204                         break;
205                     }
206 
207                     ptr_ = &dfa_[state_ * dfa_alphabet_];
208                 }
209 
210                 if (*ptr_)
211                 {
212                     end_state_ = true;
213                     id_ = *(ptr_ + boost::lexer::id_index);
214                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
215                     end_bol_ = bol;
216                     end_token_ = curr_;
217                 }
218             }
219 
220             std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
221 
222             if (EOL_state_ && curr_ == end_)
223             {
224                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
225 
226                 if (*ptr_)
227                 {
228                     end_state_ = true;
229                     id_ = *(ptr_ + boost::lexer::id_index);
230                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
231                     end_bol_ = bol;
232                     end_token_ = curr_;
233                 }
234             }
235 
236             if (end_state_) {
237                 // return longest match
238                 bol_ = end_bol_;
239                 start_token_ = end_token_;
240             }
241             else {
242                 bol_ = *start_token_ == '\n';
243                 id_ = boost::lexer::npos;
244                 uid_ = boost::lexer::npos;
245             }
246 
247             unique_id_ = uid_;
248             return id_;
249         }
250     };
251 
252 }}}}
253 
254 #endif
255