mirror of
https://github.com/boostorg/regex.git
synced 2025-07-05 00:26:30 +02:00
295 lines
15 KiB
HTML
295 lines
15 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||
<html>
|
||
<head>
|
||
<title>Boost.Regex: Working With MFC/ATL String Types</title>
|
||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||
<body>
|
||
<P>
|
||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||
<TR>
|
||
<td vAlign="top" width="300">
|
||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||
</td>
|
||
<TD width="353">
|
||
<H1 align="center">Boost.Regex</H1>
|
||
<H2 align="center">Working With MFC/ATL String Types.</H2>
|
||
</TD>
|
||
<td width="50">
|
||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||
</td>
|
||
</TR>
|
||
</TABLE>
|
||
</P>
|
||
<HR>
|
||
<H3>Contents</H3>
|
||
<dl class="index">
|
||
<dt><A href="#intro">Introduction</A> <dt><A href="#types">Types</A> <dt><A href="#create">Regular
|
||
Expression Creation</A> <dt><A href="#algo">Overloaded Algorithms</A>
|
||
<dd>
|
||
<dl>
|
||
<dt><A href="#regex_match">regex_match</A> <dt><A href="#regex_search">regex_search</A>
|
||
<dt><A href="#regex_replace">regex_replace</A> </dt>
|
||
</dl>
|
||
<dt><A href="#iterators">Iterators</A>
|
||
<dd>
|
||
<dl>
|
||
<dt><A href="#regex_iterator">regex_iterator creation helper</A> <dt><A href="#regex_token_iterator">
|
||
regex_token_iterator creation helpers</A></dt>
|
||
</dl>
|
||
</dd>
|
||
</dl>
|
||
<H3><a name="intro"></a>Introduction</H3>
|
||
<P>The header <boost/regex/mfc.hpp> provides Boost.Regex support for MFC
|
||
string types: note that this support requires Visual Studio .NET (Visual C++ 7)
|
||
or later, where all of the MFC and ATL string types are based around
|
||
the CSimpleStringT class template. </P>
|
||
<P>In the following documentation, whenever you see CSimpleStringT<charT>,
|
||
then you can substitute any of the following MFC/ATL types (all of which
|
||
inherit from CSimpleStringT):</P>
|
||
<P>CString<BR>
|
||
CStringA<BR>
|
||
CStringW<BR>
|
||
CAtlString<BR>
|
||
CAtlStringA<BR>
|
||
CAtlStringW<BR>
|
||
CStringT<charT,traits><BR>
|
||
CFixedStringT<charT,N><BR>
|
||
CSimpleStringT<charT></B></P>
|
||
<H3><A name="types"></A>Types</H3>
|
||
<P>The following typedefs are provided for the convenience of those working with
|
||
TCHAR's:</P>
|
||
<PRE>typedef <A href="basic_regex.html" >basic_regex</A><TCHAR> tregex;
|
||
typedef <A href="match_results.html" >match_results</A><TCHAR const*> tmatch;
|
||
typedef <A href="regex_iterator.html" >regex_iterator</A><TCHAR const*> tregex_iterator;
|
||
typedef <A href="regex_token_iterator.html" >regex_token_iterator</A><TCHAR const*> tregex_token_iterator;
|
||
</PRE>
|
||
<P>If you are working with explicitly narrow or wide characters rather than TCHAR,
|
||
then use the regular Boost.Regex types instead.</P>
|
||
<H3><A name="create"></A>Regular Expression Creation</H3>
|
||
<P>The following helper function is available to assist in the creation of a
|
||
regular expression from an MFC/ATL string type:</P>
|
||
<pre>template <class charT>
|
||
basic_regex<charT>
|
||
make_regex(const ATL::CSimpleStringT<charT>& s,
|
||
::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal);</pre>
|
||
<P><STRONG>Effects</STRONG>: returns basic_regex<charT>(s.GetString(),
|
||
s.GetString() + s.GetLength(), f);</P>
|
||
<H3><A name="algo"></A>Overloaded Algorithms</H3>
|
||
<P>For each regular expression algorithm that's overloaded for a std::basic_string
|
||
argument, there is also one overloaded for the MFC/ATL string types.
|
||
These algorithm signatures all look a lot more complex than they actually
|
||
are, but for completeness here they are anyway:</P>
|
||
<H4><A name="regex_match"></A>regex_match</H4>
|
||
<P>There are two overloads, the first reports what matched in a match_results
|
||
structure, the second does not.
|
||
</P>
|
||
<P>All the usual caveats for <A href="regex_match.html">regex_match</A> apply, in
|
||
particular the algorithm will only report a successful match if <STRONG>all of the
|
||
input text matches the expression</STRONG>, if this isn't what you want then
|
||
use <A href="regex_search.html">regex_search</A> instead.</P>
|
||
<PRE>template <class charT, class T, class A>
|
||
bool regex_match(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
match_results<const B*, A>& what,
|
||
const basic_regex<charT, T>& e,
|
||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); </PRE>
|
||
<P>
|
||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_match.html">regex_match</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), what, e, f);</P>
|
||
<p><strong>Example:</strong></p>
|
||
<pre>//
|
||
// Extract filename part of a path from a CString and return the result
|
||
// as another CString:
|
||
//
|
||
CString get_filename(const CString& path)
|
||
{
|
||
boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
|
||
boost::tmatch what;
|
||
if(boost::regex_match(path, what, r))
|
||
{
|
||
// extract $1 as a CString:
|
||
return CString(what[1].first, what.length(1));
|
||
}
|
||
else
|
||
{
|
||
throw std::runtime_error("Invalid pathname");
|
||
}
|
||
}
|
||
</pre>
|
||
<hr>
|
||
<PRE>template <class charT, class T>
|
||
bool regex_match(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<B, T>& e,
|
||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)</PRE>
|
||
<P>
|
||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_match.html">regex_match</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, f);</P>
|
||
<p><strong>Example:</strong></p>
|
||
<pre>//
|
||
// Find out if *password* meets our password requirements,
|
||
// as defined by the regular expression *requirements*.
|
||
//
|
||
bool is_valid_password(const CString& password, const CString& requirements)
|
||
{
|
||
return boost::regex_match(password, boost::make_regex(requirements));
|
||
} </pre>
|
||
<hr>
|
||
<H4><A name="regex_search"></A>regex_search</H4>
|
||
<P>There are two additional overloads for <A href="regex_search.html">regex_search</A>,
|
||
the first reports what matched the second does not:</P>
|
||
<PRE>template <class charT, class A, class T>
|
||
bool regex_search(const ATL::CSimpleStringT<charT>& s,
|
||
match_results<const charT*, A>& what,
|
||
const basic_regex<charT, T>& e,
|
||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)</PRE>
|
||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_search.html">regex_search</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), what, e, f);</P>
|
||
<P><STRONG>Example:</STRONG>: Postcode extraction from an address string.</P>
|
||
<pre>CString extract_postcode(const CString& address)
|
||
{
|
||
// searches throw address for a UK postcode and returns the result,
|
||
// the expression used is by Phil A. on www.regxlib.com:
|
||
boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
|
||
boost::tmatch what;
|
||
if(boost::regex_search(address, what, r))
|
||
{
|
||
// extract $0 as a CString:
|
||
return CString(what[0].first, what.length());
|
||
}
|
||
else
|
||
{
|
||
throw std::runtime_error("No postcode found");
|
||
}
|
||
} </pre>
|
||
<hr>
|
||
<pre>template <class charT, class T>
|
||
inline bool regex_search(const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT, T>& e,
|
||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
|
||
</pre>
|
||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_search.html">regex_search</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, f);</P>
|
||
<hr>
|
||
<H4><A name="regex_replace"></A>regex_replace</H4>
|
||
<P>There are two additional overloads for <A href="regex_replace.html">regex_replace</A>,
|
||
the first sends output to an output iterator, while the second creates a new
|
||
string</P>
|
||
<PRE>template <class OutputIterator, class BidirectionalIterator, class traits, class
|
||
charT>
|
||
OutputIterator regex_replace(OutputIterator out,
|
||
BidirectionalIterator first,
|
||
BidirectionalIterator last,
|
||
const basic_regex<charT, traits>& e,
|
||
const ATL::CSimpleStringT<charT>& fmt,
|
||
match_flag_type flags = match_default)
|
||
</PRE>
|
||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_replace.html">regex_replace</A>(out,
|
||
first, last, e, fmt.GetString(), flags);</P>
|
||
<pre>template <class traits, charT>
|
||
ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT, traits>& e,
|
||
const ATL::CSimpleStringT<charT>& fmt,
|
||
match_flag_type flags = match_default)</pre>
|
||
<P><STRONG>Effects</STRONG>: returns a new string created using <A href="regex_replace.html">
|
||
regex_replace</A>, and the same memory manager as string <EM>s</EM>.</P>
|
||
<P><STRONG>Example:</STRONG></P>
|
||
<PRE>//
|
||
// Take a credit card number as a string of digits,
|
||
// and reformat it as a human readable string with "-"
|
||
// separating each group of four digits:
|
||
//
|
||
const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
|
||
const CString human_format = __T("$1-$2-$3-$4");
|
||
|
||
CString human_readable_card_number(const CString& s)
|
||
{
|
||
return boost::regex_replace(s, e, human_format);
|
||
}
|
||
</PRE>
|
||
<H3><a name="iterators"></a>Iterators</H3>
|
||
<P>The following helper functions are provided to ease the conversion from an
|
||
MFC/ATL string to a <A href="regex_iterator.html">regex_iterator</A> or <A href="regex_token_iterator.html">
|
||
regex_token_iterator</A>:</P>
|
||
<H4><A name="regex_iterator"></A>regex_iterator creation helper</H4>
|
||
<PRE>template <class charT>
|
||
regex_iterator<charT const*>
|
||
make_regex_iterator(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT>& e,
|
||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||
</PRE>
|
||
<p><STRONG>Effects:</STRONG>returns <A href="regex_iterator.html">regex_iterator</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, f);</p>
|
||
<p><strong>Example:</strong></p>
|
||
<pre>void enumerate_links(const CString& html)
|
||
{
|
||
// enumerate and print all the <a> links in some HTML text,
|
||
// the expression used is by Andew Lee on <a href="http://www.regxlib.com">www.regxlib.com</a>:
|
||
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
|
||
boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
|
||
while(i != j)
|
||
{
|
||
std::cout << (*i)[1] << std::endl;
|
||
++i;
|
||
}
|
||
}
|
||
</pre>
|
||
<hr>
|
||
<H4><A name="regex_token_iterator"></A>regex_token_iterator creation helpers</H4>
|
||
<PRE>template <class charT>
|
||
regex_token_iterator<charT const*>
|
||
make_regex_token_iterator(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT>& e,
|
||
int sub = 0,
|
||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||
</PRE>
|
||
<p><STRONG>Effects:</STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, sub, f);</p>
|
||
<pre>template <class charT>
|
||
regex_token_iterator<charT const*>
|
||
make_regex_token_iterator(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT>& e,
|
||
const std::vector<int>& subs,
|
||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||
</pre>
|
||
<p><STRONG>Effects:</STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, subs, f);</p>
|
||
<pre>template <class charT, std::size_t N>
|
||
regex_token_iterator<charT const*>
|
||
make_regex_token_iterator(
|
||
const ATL::CSimpleStringT<charT>& s,
|
||
const basic_regex<charT>& e,
|
||
const int (& subs)[N],
|
||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||
</pre>
|
||
<p><STRONG>Effects: </STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||
s.GetString() + s.GetLength(), e, subs, f);</p>
|
||
<P><STRONG>Example:</STRONG></P>
|
||
<PRE>void enumerate_links2(const CString& html)
|
||
{
|
||
// enumerate and print all the <a> links in some HTML text,
|
||
// the expression used is by Andew Lee on <a href="http://www.regxlib.com">www.regxlib.com</a>:
|
||
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
|
||
boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
|
||
while(i != j)
|
||
{
|
||
std::cout << *i << std::endl;
|
||
++i;
|
||
}
|
||
} </PRE>
|
||
<HR>
|
||
<p>Revised
|
||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||
21 Dec 2004
|
||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||
</body>
|
||
</html>
|