1[/
2  Copyright 2006-2007 John Maddock.
3  Distributed under the Boost Software License, Version 1.0.
4  (See accompanying file LICENSE_1_0.txt or copy at
5  http://www.boost.org/LICENSE_1_0.txt).
6]
7
8[section:mfc_strings Using Boost Regex With MFC Strings]
9
10[section:mfc_intro Introduction to Boost.Regex and MFC Strings]
11
12The header `<boost/regex/mfc.hpp>` provides Boost.Regex support for MFC string
13types: note that this support requires Visual Studio .NET (Visual C++ 7) or
14later, where all of the MFC and ATL string types are based around the
15CSimpleStringT class template.
16
17In the following documentation, whenever you see
18CSimpleStringT<charT>, then you can substitute any of the following
19MFC/ATL types (all of which inherit from CSimpleStringT):
20
21   CString
22   CStringA
23   CStringW
24   CAtlString
25   CAtlStringA
26   CAtlStringW
27   CStringT<charT,traits>
28   CFixedStringT<charT,N>
29   CSimpleStringT<charT>
30
31[endsect]
32[section:mfc_regex_types Regex Types Used With MFC Strings]
33
34The following typedefs are provided for the convenience of those working with
35TCHAR's:
36
37   typedef basic_regex<TCHAR>                  tregex;
38   typedef match_results<TCHAR const*>         tmatch;
39   typedef regex_iterator<TCHAR const*>        tregex_iterator;
40   typedef regex_token_iterator<TCHAR const*>  tregex_token_iterator;
41
42If you are working with explicitly narrow or wide characters rather than
43TCHAR, then use the regular Boost.Regex types `regex` and `wregex` instead.
44
45[endsect]
46[section:mfc_regex_create Regular Expression Creation From an MFC String]
47
48The following helper function is available to assist in the creation of a
49regular expression from an MFC/ATL string type:
50
51   template <class charT>
52   basic_regex<charT>
53      make_regex(const ATL::CSimpleStringT<charT>& s,
54               ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal);
55
56[*Effects]: returns `basic_regex<charT>(s.GetString(), s.GetString() + s.GetLength(), f);`
57
58[endsect]
59[section:mfc_algo Overloaded Algorithms For MFC String Types]
60
61For each regular expression algorithm that's overloaded for a `std::basic_string`
62argument, there is also one overloaded for the MFC/ATL string types.  These
63algorithm signatures all look a lot more complex than they actually are,
64but for completeness here they are anyway:
65
66[h4 regex_match]
67
68There are two overloads, the first reports what matched in a match_results
69structure, the second does not.
70
71All the usual caveats for [regex_match] apply, in particular the algorithm
72will only report a successful match if all of the input text matches the
73expression, if this isn't what you want then use [regex_search] instead.
74
75   template <class charT, class T, class A>
76   bool regex_match(
77      const ATL::CSimpleStringT<charT>& s,
78      match_results<const B*, A>& what,
79      const basic_regex<charT, T>& e,
80      boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
81
82[*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), what, e, f);`
83
84[*Example:]
85
86   //
87   // Extract filename part of a path from a CString and return the result
88   // as another CString:
89   //
90   CString get_filename(const CString& path)
91   {
92      boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
93      boost::tmatch what;
94      if(boost::regex_match(path, what, r))
95      {
96         // extract $1 as a CString:
97         return CString(what[1].first, what.length(1));
98      }
99      else
100      {
101         throw std::runtime_error("Invalid pathname");
102      }
103   }
104
105[h4 regex_match (second overload)]
106
107   template <class charT, class T>
108   bool regex_match(
109      const ATL::CSimpleStringT<charT>& s,
110      const basic_regex<B, T>& e,
111      boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
112
113[*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), e, f);`
114
115[*Example:]
116
117   //
118   // Find out if *password* meets our password requirements,
119   // as defined by the regular expression *requirements*.
120   //
121   bool is_valid_password(const CString& password, const CString& requirements)
122   {
123      return boost::regex_match(password, boost::make_regex(requirements));
124   }
125
126[h4 regex_search]
127
128There are two additional overloads for [regex_search], the first reports what
129matched the second does not:
130
131   template <class charT, class A, class T>
132   bool regex_search(const ATL::CSimpleStringT<charT>& s,
133                     match_results<const charT*, A>& what,
134                     const basic_regex<charT, T>& e,
135                     boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
136
137[*Effects]: returns ::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), what, e, f);
138
139[*Example]: Postcode extraction from an address string.
140
141   CString extract_postcode(const CString& address)
142   {
143      // searches throw address for a UK postcode and returns the result,
144      // the expression used is by Phil A. on www.regxlib.com:
145      boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
146      boost::tmatch what;
147      if(boost::regex_search(address, what, r))
148      {
149         // extract $0 as a CString:
150         return CString(what[0].first, what.length());
151      }
152      else
153      {
154         throw std::runtime_error("No postcode found");
155      }
156   }
157
158[h4 regex_search (second overload)]
159
160   template <class charT, class T>
161   inline bool regex_search(const ATL::CSimpleStringT<charT>& s,
162                  const basic_regex<charT, T>& e,
163                  boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
164
165[*Effects]: returns `::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), e, f);`
166
167[h4 regex_replace]
168
169There are two additional overloads for [regex_replace], the first sends output
170to an output iterator, while the second creates a new string
171
172   template <class OutputIterator, class BidirectionalIterator, class traits, class
173            charT>
174   OutputIterator regex_replace(OutputIterator out,
175                              BidirectionalIterator first,
176                              BidirectionalIterator last,
177                              const basic_regex<charT, traits>& e,
178                              const ATL::CSimpleStringT<charT>& fmt,
179                              match_flag_type flags = match_default)
180
181[*Effects]: returns `::boost::regex_replace(out, first, last, e, fmt.GetString(), flags);`
182
183   template <class traits, charT>
184   ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s,
185                              const basic_regex<charT, traits>& e,
186                              const ATL::CSimpleStringT<charT>& fmt,
187                              match_flag_type flags = match_default)
188
189[*Effects]: returns a new string created using [regex_replace], and the same
190memory manager as string /s/.
191
192[*Example]:
193
194   //
195   // Take a credit card number as a string of digits,
196   // and reformat it as a human readable string with "-"
197   // separating each group of four digits:
198   //
199   const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
200   const CString human_format = __T("$1-$2-$3-$4");
201
202   CString human_readable_card_number(const CString& s)
203   {
204      return boost::regex_replace(s, e, human_format);
205   }
206
207[endsect]
208[section:mfc_iter Iterating Over the Matches Within An MFC String]
209
210The following helper functions are provided to ease the conversion from an
211MFC/ATL string to a [regex_iterator] or [regex_token_iterator]:
212
213[h4 regex_iterator creation helper]
214
215   template <class charT>
216   regex_iterator<charT const*>
217      make_regex_iterator(
218         const ATL::CSimpleStringT<charT>& s,
219         const basic_regex<charT>& e,
220         ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
221
222[*Effects]: returns `regex_iterator(s.GetString(), s.GetString() + s.GetLength(), e, f);`
223
224[*Example]:
225
226   void enumerate_links(const CString& html)
227   {
228      // enumerate and print all the  links in some HTML text,
229      // the expression used is by Andew Lee on www.regxlib.com:
230      boost::tregex r(
231         __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+"
232             "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*"
233             "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
234      boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
235      while(i != j)
236      {
237         std::cout << (*i)[1] << std::endl;
238         ++i;
239      }
240   }
241
242
243[h4 regex_token_iterator creation helpers]
244
245   template <class charT>
246   regex_token_iterator<charT const*>
247      make_regex_token_iterator(
248         const ATL::CSimpleStringT<charT>& s,
249         const basic_regex<charT>& e,
250         int sub = 0,
251         ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
252
253[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, sub, f);`
254
255   template <class charT>
256   regex_token_iterator<charT const*>
257      make_regex_token_iterator(
258         const ATL::CSimpleStringT<charT>& s,
259         const basic_regex<charT>& e,
260         const std::vector<int>& subs,
261         ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
262
263[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);`
264
265   template <class charT, std::size_t N>
266   regex_token_iterator<charT const*>
267      make_regex_token_iterator(
268         const ATL::CSimpleStringT<charT>& s,
269         const basic_regex<charT>& e,
270         const int (& subs)[N],
271         ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
272
273[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);`
274
275[*Example]:
276
277   void enumerate_links2(const CString& html)
278   {
279      // enumerate and print all the  links in some HTML text,
280      // the expression used is by Andew Lee on www.regxlib.com:
281      boost::tregex r(
282            __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+"
283                "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*"
284                "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
285      boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
286      while(i != j)
287      {
288         std::cout << *i << std::endl;
289         ++i;
290      }
291   }
292
293[endsect]
294[endsect]
295
296