1 // state_machine.hpp
2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
7 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
8 
9 #include <algorithm>
10 #include "conversion/char_state_machine.hpp"
11 #include "consts.hpp"
12 #include <deque>
13 #include "internals.hpp"
14 #include <map>
15 #include "containers/ptr_vector.hpp"
16 #include "size_t.hpp"
17 #include <string>
18 
19 namespace boost
20 {
21 namespace lexer
22 {
23 template<typename CharT>
24 class basic_state_machine
25 {
26 public:
27     typedef CharT char_type;
28 
29     class iterator
30     {
31     public:
32         friend class basic_state_machine;
33 
34         struct data
35         {
36             // Current iterator info
37             std::size_t dfa;
38             std::size_t states;
39             std::size_t state;
40             std::size_t transitions;
41             std::size_t transition;
42 
43             // Current state info
44             bool end_state;
45             std::size_t id;
46             std::size_t unique_id;
47             std::size_t goto_dfa;
48             std::size_t bol_index;
49             std::size_t eol_index;
50 
51             // Current transition info
52             basic_string_token<CharT> token;
53             std::size_t goto_state;
54 
databoost::lexer::basic_state_machine::iterator::data55             data () :
56                 dfa (npos),
57                 states (0),
58                 state (npos),
59                 transitions (0),
60                 transition (npos),
61                 end_state (false),
62                 id (npos),
63                 unique_id (npos),
64                 goto_dfa (npos),
65                 bol_index (npos),
66                 eol_index (npos),
67                 goto_state (npos)
68             {
69             }
70 
operator ==boost::lexer::basic_state_machine::iterator::data71             bool operator == (const data &rhs_) const
72             {
73                 return dfa == rhs_.dfa &&
74                     states == rhs_.states &&
75                     state == rhs_.state &&
76                     transitions == rhs_.transitions &&
77                     transition == rhs_.transition &&
78                     end_state == rhs_.end_state &&
79                     id == rhs_.id &&
80                     unique_id == rhs_.unique_id &&
81                     goto_dfa == rhs_.goto_dfa &&
82                     bol_index == rhs_.bol_index &&
83                     eol_index == rhs_.eol_index &&
84                     token == rhs_.token &&
85                     goto_state == rhs_.goto_state;
86             }
87         };
88 
iterator()89         iterator () :
90             _sm (0),
91             _dfas (0),
92             _dfa (npos),
93             _states (0),
94             _state (npos),
95             _transitions (0),
96             _transition (npos)
97         {
98         }
99 
operator ==(const iterator & rhs_) const100         bool operator == (const iterator &rhs_) const
101         {
102             return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
103                 _states == rhs_._states && _state == rhs_._state &&
104                 _transitions == rhs_._transitions &&
105                 _transition == rhs_._transition;
106         }
107 
operator !=(const iterator & rhs_) const108         bool operator != (const iterator &rhs_) const
109         {
110             return !(*this == rhs_);
111         }
112 
operator *()113         data &operator * ()
114         {
115             return _data;
116         }
117 
operator ->()118         data *operator -> ()
119         {
120             return &_data;
121         }
122 
123         // Let compiler generate operator = ().
124 
125         // prefix version
operator ++()126         iterator &operator ++ ()
127         {
128             next ();
129             return *this;
130         }
131 
132         // postfix version
operator ++(int)133         iterator operator ++ (int)
134         {
135             iterator iter_ = *this;
136 
137             next ();
138             return iter_;
139         }
140 
clear()141         void clear ()
142         {
143             _dfas = _states = _transitions = 0;
144             _dfa = _state = _transition = npos;
145         }
146 
147     private:
148         basic_state_machine *_sm;
149         data _data;
150         std::size_t _dfas;
151         std::size_t _dfa;
152         std::size_t _states;
153         std::size_t _state;
154         std::size_t _transitions;
155         std::size_t _transition;
156         typename detail::basic_char_state_machine<CharT>::state::
157             size_t_string_token_map::const_iterator _token_iter;
158         typename detail::basic_char_state_machine<CharT>::state::
159             size_t_string_token_map::const_iterator _token_end;
160 
next()161         void next ()
162         {
163             bool reset_state_ = false;
164 
165             if (_transition >= _transitions)
166             {
167                 _transition = _data.transition = 0;
168                 _data.state = ++_state;
169                 reset_state_ = true;
170 
171                 if (_state >= _states)
172                 {
173                     ++_dfa;
174 
175                     if (_dfa >= _dfas)
176                     {
177                         clear ();
178                         reset_state_ = false;
179                     }
180                     else
181                     {
182                         _states = _data.states =
183                             _sm->_csm._sm_vector[_dfa].size ();
184                         _state = _data.state = 0;
185                     }
186                 }
187             }
188             else
189             {
190                 _data.transition = _transition;
191             }
192 
193             if (reset_state_)
194             {
195                 const typename detail::basic_char_state_machine<CharT>::
196                     state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
197 
198                 _transitions = _data.transitions = ptr_->_transitions.size ();
199                 _data.end_state = ptr_->_end_state;
200                 _data.id = ptr_->_id;
201                 _data.unique_id = ptr_->_unique_id;
202                 _data.goto_dfa = ptr_->_state;
203                 _data.bol_index = ptr_->_bol_index;
204                 _data.eol_index = ptr_->_eol_index;
205                 _token_iter = ptr_->_transitions.begin ();
206                 _token_end = ptr_->_transitions.end ();
207             }
208 
209             if (_token_iter != _token_end)
210             {
211                 _data.token = _token_iter->second;
212                 _data.goto_state = _token_iter->first;
213                 ++_token_iter;
214                 ++_transition;
215             }
216             else
217             {
218                 _data.token.clear ();
219                 _data.goto_state = npos;
220             }
221         }
222     };
223 
224     friend class iterator;
225 
basic_state_machine()226     basic_state_machine ()
227     {
228     }
229 
clear()230     void clear ()
231     {
232         _internals.clear ();
233         _csm.clear ();
234     }
235 
empty() const236     bool empty () const
237     {
238         // Don't include _csm in this test, as irrelevant to state.
239         return _internals._lookup->empty () &&
240             _internals._dfa_alphabet.empty () &&
241             _internals._dfa->empty ();
242     }
243 
size() const244     std::size_t size () const
245     {
246         return _internals._dfa->size ();
247     }
248 
operator ==(const basic_state_machine & rhs_) const249     bool operator == (const basic_state_machine &rhs_) const
250     {
251         // Don't include _csm in this test, as irrelevant to state.
252         return _internals._lookup == rhs_._internals._lookup &&
253             _internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
254             _internals._dfa == rhs_._internals._dfa &&
255             _internals._seen_BOL_assertion ==
256                 rhs_._internals._seen_BOL_assertion &&
257             _internals._seen_EOL_assertion ==
258                 rhs_._internals._seen_EOL_assertion;
259     }
260 
begin() const261     iterator begin () const
262     {
263         iterator iter_;
264 
265         iter_._sm = const_cast<basic_state_machine *>(this);
266         check_for_csm ();
267 
268         if (!_csm.empty ())
269         {
270             const typename detail::basic_char_state_machine<CharT>::
271                 state_vector *ptr_ = &_csm._sm_vector.front ();
272 
273             iter_._dfas = _csm._sm_vector.size ();
274             iter_._states = iter_._data.states = ptr_->size ();
275             iter_._transitions = iter_._data.transitions =
276                 ptr_->front ()._transitions.size ();
277             iter_._dfa = iter_._data.dfa = 0;
278             iter_._state = iter_._data.state = 0;
279             iter_._transition = 0;
280             iter_._data.end_state = ptr_->front ()._end_state;
281             iter_._data.id = ptr_->front ()._id;
282             iter_._data.unique_id = ptr_->front ()._unique_id;
283             iter_._data.goto_dfa = ptr_->front ()._state;
284             iter_._data.bol_index = ptr_->front ()._bol_index;
285             iter_._data.eol_index = ptr_->front ()._eol_index;
286             iter_._token_iter = ptr_->front ()._transitions.begin ();
287             iter_._token_end = ptr_->front ()._transitions.end ();
288 
289             // Deal with case where there is only a bol or eol
290             // but no other transitions.
291             if (iter_._transitions)
292             {
293                 ++iter_;
294             }
295         }
296 
297         return iter_;
298     }
299 
end() const300     iterator end () const
301     {
302         iterator iter_;
303 
304         iter_._sm = const_cast<basic_state_machine *>(this);
305         return iter_;
306     }
307 
swap(basic_state_machine & sm_)308     void swap (basic_state_machine &sm_)
309     {
310         _internals.swap (sm_._internals);
311         _csm.swap (sm_._csm);
312     }
313 
data() const314     const detail::internals &data () const
315     {
316         return _internals;
317     }
318 
319 private:
320     detail::internals _internals;
321     mutable detail::basic_char_state_machine<CharT> _csm;
322 
check_for_csm() const323     void check_for_csm () const
324     {
325         if (_csm.empty ())
326         {
327             human_readable (_csm);
328         }
329     }
330 
human_readable(detail::basic_char_state_machine<CharT> & sm_) const331     void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
332     {
333         const std::size_t max_ = sizeof (CharT) == 1 ?
334             num_chars : num_wchar_ts;
335         const std::size_t start_states_ = _internals._dfa->size ();
336 
337         sm_.clear ();
338         sm_._sm_vector.resize (start_states_);
339 
340         for (std::size_t start_state_index_ = 0;
341             start_state_index_ < start_states_; ++start_state_index_)
342         {
343             const detail::internals::size_t_vector *lu_ =
344                 _internals._lookup[start_state_index_];
345             const std::size_t alphabet_ =
346                 _internals._dfa_alphabet[start_state_index_] - dfa_offset;
347             std::vector<std::basic_string<CharT> > chars_ (alphabet_);
348             const std::size_t states_ = _internals._dfa[start_state_index_]->
349                 size () / (alphabet_ + dfa_offset);
350             const std::size_t *read_ptr_ = &_internals.
351                 _dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
352 
353             sm_._sm_vector[start_state_index_].resize (states_ - 1);
354 
355             for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
356                 ++alpha_index_)
357             {
358                 const std::size_t col_ = lu_->at (alpha_index_);
359 
360                 if (col_ != static_cast<std::size_t>(dead_state_index))
361                 {
362                     chars_[col_ - dfa_offset] += static_cast<CharT>
363                         (alpha_index_);
364                 }
365             }
366 
367             for (std::size_t state_index_ = 1; state_index_ < states_;
368                 ++state_index_)
369             {
370                 typename detail::basic_char_state_machine<CharT>::state
371                     *state_ = &sm_._sm_vector[start_state_index_]
372                     [state_index_ - 1];
373 
374                 state_->_end_state = *read_ptr_ != 0;
375                 state_->_id = *(read_ptr_ + id_index);
376                 state_->_unique_id = *(read_ptr_ + unique_id_index);
377                 state_->_state = *(read_ptr_ + state_index);
378                 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
379                 state_->_eol_index = *(read_ptr_ + eol_index) - 1;
380                 read_ptr_ += dfa_offset;
381 
382                 for (std::size_t col_index_ = 0; col_index_ < alphabet_;
383                     ++col_index_, ++read_ptr_)
384                 {
385                     const std::size_t transition_ = *read_ptr_;
386 
387                     if (transition_ != 0)
388                     {
389                         const std::size_t i_ = transition_ - 1;
390                         typename detail::basic_char_state_machine<CharT>::
391                             state::size_t_string_token_map::iterator iter_ =
392                             state_->_transitions.find (i_);
393 
394                         if (iter_ == state_->_transitions.end ())
395                         {
396                             basic_string_token<CharT> token_
397                                 (false, chars_[col_index_]);
398                             typename detail::basic_char_state_machine<CharT>::
399                                 state::size_t_string_token_pair pair_
400                                 (i_, token_);
401 
402                             state_->_transitions.insert (pair_);
403                         }
404                         else
405                         {
406                             iter_->second._charset += chars_[col_index_];
407                         }
408                     }
409                 }
410 
411                 for (typename detail::basic_char_state_machine<CharT>::state::
412                     size_t_string_token_map::iterator iter_ =
413                     state_->_transitions.begin (),
414                     end_ = state_->_transitions.end ();
415                     iter_ != end_; ++iter_)
416                 {
417                     std::sort (iter_->second._charset.begin (),
418                         iter_->second._charset.end ());
419                     iter_->second.normalise ();
420                 }
421             }
422         }
423     }
424 };
425 
426 typedef basic_state_machine<char> state_machine;
427 typedef basic_state_machine<wchar_t> wstate_machine;
428 }
429 }
430 
431 #endif
432