1[/ 2 Copyright 2006-2007 John Maddock. 3 Distributed under the Boost Software License, Version 1.0. 4 (See accompanying file LICENSE_1_0.txt or copy at 5 http://www.boost.org/LICENSE_1_0.txt). 6] 7 8[section:mfc_strings Using Boost Regex With MFC Strings] 9 10[section:mfc_intro Introduction to Boost.Regex and MFC Strings] 11 12The header `<boost/regex/mfc.hpp>` provides Boost.Regex support for MFC string 13types: note that this support requires Visual Studio .NET (Visual C++ 7) or 14later, where all of the MFC and ATL string types are based around the 15CSimpleStringT class template. 16 17In the following documentation, whenever you see 18CSimpleStringT<charT>, then you can substitute any of the following 19MFC/ATL types (all of which inherit from CSimpleStringT): 20 21 CString 22 CStringA 23 CStringW 24 CAtlString 25 CAtlStringA 26 CAtlStringW 27 CStringT<charT,traits> 28 CFixedStringT<charT,N> 29 CSimpleStringT<charT> 30 31[endsect] 32[section:mfc_regex_types Regex Types Used With MFC Strings] 33 34The following typedefs are provided for the convenience of those working with 35TCHAR's: 36 37 typedef basic_regex<TCHAR> tregex; 38 typedef match_results<TCHAR const*> tmatch; 39 typedef regex_iterator<TCHAR const*> tregex_iterator; 40 typedef regex_token_iterator<TCHAR const*> tregex_token_iterator; 41 42If you are working with explicitly narrow or wide characters rather than 43TCHAR, then use the regular Boost.Regex types `regex` and `wregex` instead. 44 45[endsect] 46[section:mfc_regex_create Regular Expression Creation From an MFC String] 47 48The following helper function is available to assist in the creation of a 49regular expression from an MFC/ATL string type: 50 51 template <class charT> 52 basic_regex<charT> 53 make_regex(const ATL::CSimpleStringT<charT>& s, 54 ::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal); 55 56[*Effects]: returns `basic_regex<charT>(s.GetString(), s.GetString() + s.GetLength(), f);` 57 58[endsect] 59[section:mfc_algo Overloaded Algorithms For MFC String Types] 60 61For each regular expression algorithm that's overloaded for a `std::basic_string` 62argument, there is also one overloaded for the MFC/ATL string types. These 63algorithm signatures all look a lot more complex than they actually are, 64but for completeness here they are anyway: 65 66[h4 regex_match] 67 68There are two overloads, the first reports what matched in a match_results 69structure, the second does not. 70 71All the usual caveats for [regex_match] apply, in particular the algorithm 72will only report a successful match if all of the input text matches the 73expression, if this isn't what you want then use [regex_search] instead. 74 75 template <class charT, class T, class A> 76 bool regex_match( 77 const ATL::CSimpleStringT<charT>& s, 78 match_results<const B*, A>& what, 79 const basic_regex<charT, T>& e, 80 boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 81 82[*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), what, e, f);` 83 84[*Example:] 85 86 // 87 // Extract filename part of a path from a CString and return the result 88 // as another CString: 89 // 90 CString get_filename(const CString& path) 91 { 92 boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)")); 93 boost::tmatch what; 94 if(boost::regex_match(path, what, r)) 95 { 96 // extract $1 as a CString: 97 return CString(what[1].first, what.length(1)); 98 } 99 else 100 { 101 throw std::runtime_error("Invalid pathname"); 102 } 103 } 104 105[h4 regex_match (second overload)] 106 107 template <class charT, class T> 108 bool regex_match( 109 const ATL::CSimpleStringT<charT>& s, 110 const basic_regex<B, T>& e, 111 boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) 112 113[*Effects]: returns `::boost::regex_match(s.GetString(), s.GetString() + s.GetLength(), e, f);` 114 115[*Example:] 116 117 // 118 // Find out if *password* meets our password requirements, 119 // as defined by the regular expression *requirements*. 120 // 121 bool is_valid_password(const CString& password, const CString& requirements) 122 { 123 return boost::regex_match(password, boost::make_regex(requirements)); 124 } 125 126[h4 regex_search] 127 128There are two additional overloads for [regex_search], the first reports what 129matched the second does not: 130 131 template <class charT, class A, class T> 132 bool regex_search(const ATL::CSimpleStringT<charT>& s, 133 match_results<const charT*, A>& what, 134 const basic_regex<charT, T>& e, 135 boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) 136 137[*Effects]: returns ::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), what, e, f); 138 139[*Example]: Postcode extraction from an address string. 140 141 CString extract_postcode(const CString& address) 142 { 143 // searches throw address for a UK postcode and returns the result, 144 // the expression used is by Phil A. on www.regxlib.com: 145 boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$")); 146 boost::tmatch what; 147 if(boost::regex_search(address, what, r)) 148 { 149 // extract $0 as a CString: 150 return CString(what[0].first, what.length()); 151 } 152 else 153 { 154 throw std::runtime_error("No postcode found"); 155 } 156 } 157 158[h4 regex_search (second overload)] 159 160 template <class charT, class T> 161 inline bool regex_search(const ATL::CSimpleStringT<charT>& s, 162 const basic_regex<charT, T>& e, 163 boost::regex_constants::match_flag_type f = boost::regex_constants::match_default) 164 165[*Effects]: returns `::boost::regex_search(s.GetString(), s.GetString() + s.GetLength(), e, f);` 166 167[h4 regex_replace] 168 169There are two additional overloads for [regex_replace], the first sends output 170to an output iterator, while the second creates a new string 171 172 template <class OutputIterator, class BidirectionalIterator, class traits, class 173 charT> 174 OutputIterator regex_replace(OutputIterator out, 175 BidirectionalIterator first, 176 BidirectionalIterator last, 177 const basic_regex<charT, traits>& e, 178 const ATL::CSimpleStringT<charT>& fmt, 179 match_flag_type flags = match_default) 180 181[*Effects]: returns `::boost::regex_replace(out, first, last, e, fmt.GetString(), flags);` 182 183 template <class traits, charT> 184 ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s, 185 const basic_regex<charT, traits>& e, 186 const ATL::CSimpleStringT<charT>& fmt, 187 match_flag_type flags = match_default) 188 189[*Effects]: returns a new string created using [regex_replace], and the same 190memory manager as string /s/. 191 192[*Example]: 193 194 // 195 // Take a credit card number as a string of digits, 196 // and reformat it as a human readable string with "-" 197 // separating each group of four digits: 198 // 199 const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z")); 200 const CString human_format = __T("$1-$2-$3-$4"); 201 202 CString human_readable_card_number(const CString& s) 203 { 204 return boost::regex_replace(s, e, human_format); 205 } 206 207[endsect] 208[section:mfc_iter Iterating Over the Matches Within An MFC String] 209 210The following helper functions are provided to ease the conversion from an 211MFC/ATL string to a [regex_iterator] or [regex_token_iterator]: 212 213[h4 regex_iterator creation helper] 214 215 template <class charT> 216 regex_iterator<charT const*> 217 make_regex_iterator( 218 const ATL::CSimpleStringT<charT>& s, 219 const basic_regex<charT>& e, 220 ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 221 222[*Effects]: returns `regex_iterator(s.GetString(), s.GetString() + s.GetLength(), e, f);` 223 224[*Example]: 225 226 void enumerate_links(const CString& html) 227 { 228 // enumerate and print all the links in some HTML text, 229 // the expression used is by Andew Lee on www.regxlib.com: 230 boost::tregex r( 231 __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+" 232 "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*" 233 "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']")); 234 boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j; 235 while(i != j) 236 { 237 std::cout << (*i)[1] << std::endl; 238 ++i; 239 } 240 } 241 242 243[h4 regex_token_iterator creation helpers] 244 245 template <class charT> 246 regex_token_iterator<charT const*> 247 make_regex_token_iterator( 248 const ATL::CSimpleStringT<charT>& s, 249 const basic_regex<charT>& e, 250 int sub = 0, 251 ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 252 253[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, sub, f);` 254 255 template <class charT> 256 regex_token_iterator<charT const*> 257 make_regex_token_iterator( 258 const ATL::CSimpleStringT<charT>& s, 259 const basic_regex<charT>& e, 260 const std::vector<int>& subs, 261 ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 262 263[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);` 264 265 template <class charT, std::size_t N> 266 regex_token_iterator<charT const*> 267 make_regex_token_iterator( 268 const ATL::CSimpleStringT<charT>& s, 269 const basic_regex<charT>& e, 270 const int (& subs)[N], 271 ::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); 272 273[*Effects]: returns `regex_token_iterator(s.GetString(), s.GetString() + s.GetLength(), e, subs, f);` 274 275[*Example]: 276 277 void enumerate_links2(const CString& html) 278 { 279 // enumerate and print all the links in some HTML text, 280 // the expression used is by Andew Lee on www.regxlib.com: 281 boost::tregex r( 282 __T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+" 283 "(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*" 284 "(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']")); 285 boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j; 286 while(i != j) 287 { 288 std::cout << *i << std::endl; 289 ++i; 290 } 291 } 292 293[endsect] 294[endsect] 295 296