mirror of
https://github.com/boostorg/regex.git
synced 2025-07-04 16:16:32 +02:00
2562 lines
125 KiB
HTML
2562 lines
125 KiB
HTML
<html>
|
|
|
|
<head>
|
|
<meta http-equiv="Content-Type"
|
|
content="text/html; charset=iso-8859-1">
|
|
<meta name="Template"
|
|
content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
|
|
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
|
|
<title>Regex++, template class and algorithm reference</title>
|
|
</head>
|
|
|
|
<body bgcolor="#FFFFFF" link="#0000FF" vlink="#800080">
|
|
|
|
<p> </p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="624">
|
|
<tr>
|
|
<td valign="top" width="50%"><h3 align="right"><img
|
|
src="../../c++boost.gif" alt="C++ Boost" width="276"
|
|
height="86"></h3>
|
|
</td>
|
|
<td valign="top" width="50%"><h2 align="center">Regex++,
|
|
Template Class and Algorithm Reference.</h2>
|
|
<p><i>(version 3.12, 18 April 2000)</i> </p>
|
|
<pre><i>Copyright (c) 1998-9
|
|
Dr John Maddock
|
|
|
|
Permission to use, copy, modify, distribute and sell this software
|
|
and its documentation for any purpose is hereby granted without fee,
|
|
provided that the above copyright notice appear in all copies and
|
|
that both that copyright notice and this permission notice appear
|
|
in supporting documentation. Dr John Maddock makes no representations
|
|
about the suitability of this software for any purpose.
|
|
It is provided "as is" without express or implied warranty.</i></pre>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regbase"></a>class regbase</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>Class regbase is the template argument independent base class
|
|
for reg_expression, the only public members are the <i>flag_type</i>
|
|
enumerated values that determine how regular expressions are
|
|
interpreted. </p>
|
|
|
|
<pre><b>class</b> regbase
|
|
{
|
|
<b>public</b>:
|
|
<b>enum</b> flag_type_
|
|
{
|
|
escape_in_lists = 1, <font
|
|
color="#000080">// '\\' special inside [...]
|
|
</font> char_classes = escape_in_lists << 1, <font
|
|
color="#000080"><i>// [[:CLASS:]] allowed
|
|
</i></font> intervals = char_classes << 1, <font
|
|
color="#000080"><i>// {x,y} allowed
|
|
</i></font> limited_ops = intervals << 1, <font
|
|
color="#000080"><i>// all of + ? and | are normal characters
|
|
</i></font> newline_alt = limited_ops << 1, <font
|
|
color="#000080"><i>// \n is the same as |
|
|
</i></font> bk_plus_qm = newline_alt << 1, <font
|
|
color="#000080"><i>// uses \+ and \?
|
|
</i></font> bk_braces = bk_plus_qm << 1, <font
|
|
color="#000080"><i>// uses \{ and \}
|
|
</i></font> bk_parens = bk_braces << 1, <font
|
|
color="#000080"><i>// uses \( and \)
|
|
</i></font> bk_refs = bk_parens << 1, <font
|
|
color="#000080"><i>// \d allowed
|
|
</i></font> bk_vbar = bk_refs << 1, <font
|
|
color="#000080"><i>// uses \|
|
|
</i></font> use_except = bk_vbar << 1, <font
|
|
color="#000080"><i>// exception on error
|
|
</i></font> failbit = use_except << 1, <font
|
|
color="#000080"><i>// error flag
|
|
</i></font> literal = failbit << 1, <font
|
|
color="#000080"><i>// all characters are literals
|
|
</i></font> icase = literal << 1, <font
|
|
color="#000080"><i>// characters are matched regardless of case
|
|
</i></font> nocollate = icase << 1, <font
|
|
color="#000080"><i>// don't use locale specific collation
|
|
</i></font>
|
|
basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
|
|
extended = char_classes | intervals | bk_refs,
|
|
normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate,
|
|
emacs = bk_braces | bk_parens | bk_refs | bk_vbar,
|
|
awk = extended | escape_in_lists,
|
|
grep = basic | newline_alt,
|
|
egrep = extended | newline_alt,
|
|
sed = basic,
|
|
perl = normal
|
|
};
|
|
<b>typedef</b> <b>unsigned</b> <b>int</b> flag_type;
|
|
}; </pre>
|
|
|
|
<p> <br>
|
|
<br>
|
|
</p>
|
|
|
|
<p>The enumerated type <i>regbase::flag_type</i> determines the
|
|
syntax rules for regular expression compilation, the various
|
|
flags have the following effects: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::escape_in_lists</td>
|
|
<td valign="top" width="45%">Allows the use of the escape
|
|
"\" character in sets of characters, for
|
|
example [\]] represents the set of characters containing
|
|
only "]". If this flag is not set then "\"
|
|
is an ordinary character inside sets.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::char_classes</td>
|
|
<td valign="top" width="45%">When this bit is set,
|
|
character classes [:classname:] are allowed inside
|
|
character set declarations, for example "[[:word:]]"
|
|
represents the set of all characters that belong to the
|
|
character class "word".</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: intervals</td>
|
|
<td valign="top" width="45%">When this bit is set,
|
|
repetition intervals are allowed, for example "a{2,4}"
|
|
represents a repeat of between 2 and 4 letter a's.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: limited_ops</td>
|
|
<td valign="top" width="45%">When this bit is set all of
|
|
"+", "?" and "|" are
|
|
ordinary characters in all situations.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: newline_alt</td>
|
|
<td valign="top" width="45%">When this bit is set, then
|
|
the newline character "\n" has the same effect
|
|
as the alternation operator "|".</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_plus_qm</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\+" represents the one or more repetition
|
|
operator and "\?" represents the zero or one
|
|
repetition operator. When this bit is not set then
|
|
"+" and "?" are used instead.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_braces</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\{" and "\}" are used for bounded
|
|
repetitions and "{" and "}" are
|
|
normal characters. This is the opposite of default
|
|
behavior.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_parens</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\(" and "\)" are used to group sub-expressions
|
|
and "(" and ")" are ordinary
|
|
characters, this is the opposite of default behaviour.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_refs</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
back references are allowed.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_vbar</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\|" represents the alternation operator and
|
|
"|" is an ordinary character. This is the
|
|
opposite of default behaviour.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: use_except</td>
|
|
<td valign="top" width="45%">When this bit is set then a <a
|
|
href="#bad_expression">bad_expression</a> exception will
|
|
be thrown on error. Use of this flag is deprecated
|
|
- reg_expression will always throw on error.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: failbit</td>
|
|
<td valign="top" width="45%">This bit is set on error, if
|
|
regbase::use_except is not set, then this bit should be
|
|
checked to see if a regular expression is valid before
|
|
usage.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::literal</td>
|
|
<td valign="top" width="45%">All characters in the string
|
|
are treated as literals, there are no special characters
|
|
or escape sequences.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::icase</td>
|
|
<td valign="top" width="45%">All characters in the string
|
|
are matched regardless of case.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::nocollate</td>
|
|
<td valign="top" width="45%">Locale specific collation is
|
|
disabled when dealing with ranges in character set
|
|
declarations. For example when this bit is set the
|
|
expression [a-c] would match the characters a, b and c
|
|
only regardless of locale, where as when this is not set
|
|
, then [a-c] matches any character which collates in the
|
|
range a to c.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::basic</td>
|
|
<td valign="top" width="45%">Equivalent to the POSIX
|
|
basic regular expression syntax: char_classes | intervals
|
|
| limited_ops | bk_braces | bk_parens | bk_refs.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">Regbase::extended</td>
|
|
<td valign="top" width="45%">Equivalent to the POSIX
|
|
extended regular expression syntax: char_classes |
|
|
intervals | bk_refs.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::normal</td>
|
|
<td valign="top" width="45%" height="24">This is the
|
|
default setting, and represents how most people expect
|
|
the library to behave. Equivalent to the POSIX extended
|
|
syntax, but with locale specific collation disabled, and
|
|
escape characters inside set declarations enabled:
|
|
regbase::escape_in_lists | regbase::char_classes |
|
|
regbase::intervals | regbase::bk_refs | regbase::nocollate.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::emacs</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatability with the emacs editor, eqivalent to:
|
|
bk_braces | bk_parens | bk_refs | bk_vbar.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::awk </td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix utility Awk, the same as POSIX
|
|
extended regular expressions, but allows escapes inside
|
|
bracket-expressions (character sets). Equivalent to
|
|
extended | escape_in_lists.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::grep</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix grep utility, the same as
|
|
POSIX basic regular expressions, but with the newline
|
|
character equivalent to the alternation operator. the
|
|
same as basic | newline_alt.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::egrep</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix egrep utility, the same as
|
|
POSIX extended regular expressions, but with the newline
|
|
character equivalent to the alternation operator. the
|
|
same as extended | newline_alt.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::sed</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix sed utility, the same as POSIX
|
|
basic regular expressions.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::perl</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatibility with the perl programming language, the
|
|
same as regbase::normal.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="bad_expression"></a>Exception classes.</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex/pattern_except.hpp">boost/pat_except.hpp</a>>
|
|
</p>
|
|
|
|
<p>An instance of <i>bad_expression</i> is thrown whenever a bad
|
|
regular expression is encountered. </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
|
|
<b>class</b> bad_pattern : <b>public</b> std::runtime_error
|
|
{
|
|
<b>public</b>:
|
|
<b>explicit</b> bad_pattern(<b>const</b> std::string& s) : std::runtime_error(s){};
|
|
};
|
|
|
|
<b>class</b> bad_expression : <b>public</b> bad_pattern
|
|
{
|
|
<b>public</b>:
|
|
bad_expression(<b>const</b> std::string& s) : bad_pattern(s) {}
|
|
};
|
|
|
|
|
|
} // namespace boost</pre>
|
|
|
|
<p>Footnotes: the class <i>bad_pattern </i>forms the base class
|
|
for all pattern-matching exceptions, of which <i>bad_expression</i>
|
|
is one. The choice of <i>std::runtime_error </i>as the base class
|
|
for <i>bad_pattern</i> is moot, depending upon how the library is
|
|
used exceptions may be either logic errors (programmer supplied
|
|
expressions) or run time errors (user supplied expressions). </p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_expression"></a>Class reg_expression</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The template class <i>reg_expression </i>encapsulates regular
|
|
expression parsing and compilation. The class derives from class <a
|
|
href="#regbase"><i>regbase</i></a> and takes three template
|
|
parameters: </p>
|
|
|
|
<p><b><i>charT</i></b>: determines the character type, i.e.
|
|
either char or wchar_t. </p>
|
|
|
|
<p><b><i>traits</i></b>: determines the behaviour of the
|
|
character type, for example whether character matching is case
|
|
sensitive or not, and which character class names are recognized.
|
|
A default traits class is provided: <a href="#regex_char_traits">regex_traits<charT></a>.
|
|
</p>
|
|
|
|
<p><b><i>Allocator</i></b>: the allocator class used to allocate
|
|
memory by the class. </p>
|
|
|
|
<p>For ease of use there are two typedefs that define the two
|
|
standard <i>reg_expression</i> instances, unless you want to use
|
|
custom allocators, you won't need to use anything other than
|
|
these: </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> traits = regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
|
<b>class</b> reg_expression;
|
|
<b>typedef</b> reg_expression<<b>char</b>> regex;
|
|
<b>typedef</b> reg_expression<<b>wchar_t> </b>wregex;
|
|
}</pre>
|
|
|
|
<p>The definition of <i>reg_expression</i> follows: it is based
|
|
very closely on class basic_string, and fulfils the requirements
|
|
for a container of <i>charT</i>. </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> traits = char_regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
|
<b>class</b> reg_expression : <b>public</b> regbase
|
|
{
|
|
<b>public</b>:
|
|
<font color="#000080"><i> // typedefs: </i></font>
|
|
<b> typedef</b> charT char_type;
|
|
<b>typedef</b> traits traits_type;
|
|
<font color="#000080"> <i>// locale_type
|
|
</i> <i>// placeholder for actual locale type used by the
|
|
</i> <i>// traits class to localise *this.
|
|
</i></font> <b>typedef</b> typename traits::locale_type locale_type;
|
|
<font color="#000080"> <i>// value_type
|
|
</i></font> <b>typedef</b> charT value_type;
|
|
<font color="#000080"> <i>// reference, const_reference
|
|
</i></font> <b>typedef</b> charT& reference;
|
|
<b>typedef</b> <b>const</b> charT& const_reference;
|
|
<font color="#000080"> <i>// iterator, const_iterator
|
|
</i></font> <b>typedef</b> <b>const</b> charT* const_iterator;
|
|
<b>typedef</b> const_iterator iterator;
|
|
<font color="#000080"> <i>// difference_type
|
|
</i></font> <b>typedef</b> <b>typename</b> Allocator::difference_type difference_type;
|
|
<font color="#000080"> <i>// size_type
|
|
</i></font> <b>typedef</b> <b>typename</b> Allocator::size_type size_type;
|
|
<font color="#000080"><i>// allocator_type
|
|
</i></font> <b> typedef</b> Allocator allocator_type;
|
|
<b>typedef</b> Allocator alloc_type;
|
|
<font color="#000080"> <i>// flag_type
|
|
</i></font> <b>typedef</b> boost::int_fast32_t flag_type;
|
|
<b>public</b>:
|
|
<font color="#000080"><em>// constructors</em></font>
|
|
<strong>explicit</strong> reg_expression(<b>const</b> Allocator& a = Allocator());
|
|
<strong>explicit</strong> reg_expression(<b>const</b> charT* p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> charT* p1, <b>const</b> charT* p2, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> charT* p, size_type len, flag_type f, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> reg_expression&);
|
|
<b> template</b> <<b>class</b> ST, <b>class</b> SA>
|
|
<strong>explicit</strong> reg_expression(<b>const</b> std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
<b> template</b> <<b>class</b> I>
|
|
reg_expression(I first, I last, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
~reg_expression();
|
|
reg_expression& <b>operator</b>=(<b>const</b> reg_expression&);
|
|
reg_expression& <b>operator</b>=(<b>const</b> charT* ptr);
|
|
<b> template</b> <<b>class</b> ST, <b>class</b> SA>
|
|
reg_expression& <b>operator</b>=(<b>const</b> std::basic_string<charT, ST, SA>& p);
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// assign:
|
|
</i></font> reg_expression& assign(<b>const</b> reg_expression& that);
|
|
reg_expression& assign(<b>const</b> charT* ptr, flag_type f = regbase::normal);
|
|
reg_expression& assign(<b>const</b> charT* first, <b>const</b> charT* last, flag_type f = regbase::normal);
|
|
<b> template</b> <<b>class</b> string_traits, <b>class</b> A>
|
|
reg_expression& assign(
|
|
<b>const</b> std::basic_string<charT, string_traits, A>& s,
|
|
flag_type f = regbase::normal);
|
|
<b>template</b> <<b>class</b> iterator>
|
|
reg_expression& assign(iterator first,
|
|
iterator last,
|
|
flag_type f = regbase::normal);
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// allocator access:
|
|
</i></font> Allocator get_allocator()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// locale:
|
|
</i></font> locale_type imbue(<b>const</b> locale_type& l);
|
|
locale_type getloc()<b>const</b>;
|
|
<font color="#000080"><i> //
|
|
</i> <i>// flags:
|
|
</i></font> flag_type getflags()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// str:
|
|
</i></font> std::basic_string<charT> str()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// begin, end:
|
|
</i></font> const_iterator begin()<b>const</b>;
|
|
const_iterator end()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// swap:
|
|
</i></font> <b>void</b> swap(reg_expression&)<b>throw</b>();
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// size:
|
|
</i></font> size_type size()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// max_size:
|
|
</i></font> size_type max_size()<b>const</b>;
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// empty:
|
|
</i></font> <b>bool</b> empty()<b>const</b>;
|
|
<b>unsigned</b> mark_count()<b>const</b>;
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> reg_expression&)<b>const</b>;
|
|
<b>bool</b> <b>operator</b><(<b>const</b> reg_expression&)<b>const</b>;
|
|
};
|
|
} <font color="#000080"><i>// namespace boost </i></font></pre>
|
|
|
|
<p><font >Class reg_expression has the following public
|
|
member functions: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression(Allocator
|
|
a = Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
a default instance of reg_expression without any
|
|
expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression(charT*
|
|
p, <b>unsigned</b> f = regbase::normal, Allocator a =
|
|
Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
an instance of reg_expression from the expression denoted
|
|
by the null terminated string <b>p</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression(charT*
|
|
p1, charT* p2, <b>unsigned</b> f = regbase::normal,
|
|
Allocator a = Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
an instance of reg_expression from the expression denoted
|
|
by pair of iterators <b>p1</b> and <b>p2</b>, using the
|
|
flags <b>f</b> to determine regular expression syntax.
|
|
See class </font><a href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression(charT*
|
|
p, size_type len, <b>unsigned</b> f, Allocator a =
|
|
Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
an instance of reg_expression from the expression denoted
|
|
by the string <b>p</b> of length <b>len</b>, using the
|
|
flags <b>f</b> to determine regular expression syntax.
|
|
See class </font><a href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>template</b>
|
|
<class ST, class SA> <br>
|
|
reg_expression(<b>const</b> std::basic_string<charT,
|
|
ST, SA>& p, boost::int_fast32_t f = regbase::normal,
|
|
<b>const</b> Allocator& a = Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
an instance of reg_expression from the expression denoted
|
|
by the string <b>p</b>, using the flags <b>f</b> to
|
|
determine regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values. </font><p><font
|
|
>Note - this member may not be available
|
|
depending upon your compiler capabilities.</font></p>
|
|
</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >template
|
|
<class I> <br>
|
|
reg_expression(I first, I last, flag_type f = regbase::normal,
|
|
const Allocator& a = Allocator());</font></td>
|
|
<td valign="top" width="45%"><font > Constructs
|
|
an instance of reg_expression from the expression denoted
|
|
by pair of iterators <b>p1</b> and <b>p2</b>, using the
|
|
flags <b>f</b> to determine regular expression syntax.
|
|
See class </font><a href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression(<b>const</b>
|
|
reg_expression&);</font></td>
|
|
<td valign="top" width="45%"><font >Copy
|
|
constructor - copies an existing regular expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression&
|
|
<b>operator</b>=(<b>const</b> reg_expression&);</font></td>
|
|
<td valign="top" width="45%"><font >Copies an
|
|
existing regular expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression&
|
|
<b>operator</b>=(<b>const</b> charT* ptr);</font></td>
|
|
<td valign="top" width="45%"><font >Equivalent to
|
|
assign(ptr);</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >template
|
|
<class ST, class SA> </font><p><font >reg_expression&
|
|
operator=(const std::basic_string<charT, ST,
|
|
SA>& p);</font></p>
|
|
</td>
|
|
<td valign="top" width="45%"><font >Equivalent to
|
|
assign(p);</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression&
|
|
assign(<b>const</b> reg_expression& that);</font></td>
|
|
<td valign="top" width="45%"><font >Copies the
|
|
regular expression contained by <b>that</b>, throws </font><a
|
|
href="#bad_expression"><font >bad_expression</font></a><font
|
|
> if <b>that</b> does not contain a valid
|
|
expression. Returns *this.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression&
|
|
assign(<b>const</b> charT* p, flag_type f = regbase::normal);</font></td>
|
|
<td valign="top" width="45%"><font >Compiles a
|
|
regular expression from the expression denoted by the
|
|
null terminated string <b>p</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values. Throws </font><a
|
|
href="#bad_expression"><font >bad_expression</font></a><font
|
|
> if <b>p</b> does not contain a valid expression.
|
|
Returns *this.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >reg_expression&
|
|
assign(<b>const</b> charT* first, <b>const</b> charT*
|
|
last, flag_type f = regbase::normal);</font></td>
|
|
<td valign="top" width="45%"><font >Compiles a
|
|
regular expression from the expression denoted by the
|
|
pair of iterators <b>first-last</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values. Throws </font><a
|
|
href="#bad_expression"><font >bad_expression</font></a><font
|
|
> if <b>first-last</b> does not contain a valid
|
|
expression. Returns *this.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>template</b>
|
|
<<b>class</b> string_traits, <b>class</b> A> <br>
|
|
reg_expression& assign(<b>const</b> std::basic_string<charT,
|
|
string_traits, A>& s, flag_type f = regbase::normal);</font></td>
|
|
<td valign="top" width="45%"><font >Compiles a
|
|
regular expression from the expression denoted by the
|
|
string <b>s</b>, using the flags <b>f</b> to determine
|
|
regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values. Throws </font><a
|
|
href="#bad_expression"><font >bad_expression</font></a><font
|
|
> if <b>s</b> does not contain a valid expression.
|
|
Returns *this.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >template
|
|
<class iterator> <br>
|
|
reg_expression& assign(iterator first, iterator last,
|
|
flag_type f = regbase::normal);</font></td>
|
|
<td valign="top" width="45%"><font >Compiles a
|
|
regular expression from the expression denoted by the
|
|
pair of iterators <b>first-last</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class </font><a
|
|
href="#regbase"><font >regbase</font></a><font
|
|
> for allowable flag values. Throws </font><a
|
|
href="#bad_expression"><font >bad_expression</font></a><font
|
|
> if <b>first-last</b> does not contain a valid
|
|
expression. Returns *this.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >Allocator
|
|
get_allocator()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
allocator used by the expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >locale_type
|
|
imbue(<b>const</b> locale_type& l);</font></td>
|
|
<td valign="top" width="45%"><font >Imbues the
|
|
expression with the specified locale, and invalidates the
|
|
current expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >locale_type
|
|
getloc()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
locale used by the expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >flag_type
|
|
getflags()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
flags used to compile the current expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >std::basic_string<charT>
|
|
str()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
current expression as a string.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >const_iterator
|
|
begin()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns a
|
|
pointer to the first character of the current expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >const_iterator
|
|
end()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns a
|
|
pointer to the end of the current expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >size_type
|
|
size()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
length of the current expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >size_type
|
|
max_size()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
maximum length of a regular expression text.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>bool</b>
|
|
empty()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns true
|
|
if the object contains no valid expression.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>unsigned</b>
|
|
mark_count()<b>const</b> ;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
number of sub-expressions in the compiled regular
|
|
expression. Note that this includes the whole match (subexpression
|
|
zero), so the value returned is always >= 1.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regex_char_traits"></a><i>Class regex_traits</i></h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex/regex_traits.hpp"><font >boost/regex/regex_traits.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font ><i>This is a preliminary version of the regular
|
|
expression traits class, and is subject to change</i>. </font></p>
|
|
|
|
<p><font >The purpose of the traits class is to make it
|
|
easier to customise the behaviour of <i>reg_expression </i>and
|
|
the associated matching algorithms. Custom traits classes can
|
|
handle special character sets or define additional character
|
|
classes, for example one could define [[:kanji:]] as the set of
|
|
all (Unicode) kanji characters. This library provides three
|
|
traits classes and a wrapper class <i>regex_traits</i>, which
|
|
inherits from one of these depending upon the default
|
|
localisation model in use, class <i>c_regex_traits</i>
|
|
encapsulates the global C locale, class <i>w32_regex_traits</i>
|
|
encapsulates the global Win32 locale (only available on Win32
|
|
systems), and class <i>cpp_regex_traits</i> encapsulates the C++
|
|
locale (only provided if std::locale is supported): </font></p>
|
|
|
|
<pre>template <class charT> class c_regex_traits;
|
|
template<> class c_regex_traits<char> { /*details*/ };
|
|
template<> class c_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class w32_regex_traits;
|
|
template<> class w32_regex_traits<char> { /*details*/ };
|
|
template<> class w32_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class cpp_regex_traits;
|
|
template<> class cpp_regex_traits<char> { /*details*/ };
|
|
template<> class cpp_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class regex_traits : public base_type { /*detailts*/ };</pre>
|
|
|
|
<p><font >Where "<i>base_type</i>" defaults to <i>w32_regex_traits</i>
|
|
on Win32 systems, and <i>c_regex_traits</i> otherwise. The
|
|
default behaviour can be changed by defining one of
|
|
BOOST_RE_LOCALE_C (forces use of <i>c_regex_traits</i> by default),
|
|
or BOOST_RE_LOCALE_CPP (forces use of <i>cpp_regex_traits</i> by
|
|
default). Alternatively a specific traits class can be passed to
|
|
the <i>reg_expression</i> template. </font></p>
|
|
|
|
<p><font >The requirements for custom traits classes are </font><a
|
|
href="traits_class_ref.htm"><font >documented separately
|
|
here....</font></a><font > </font></p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_match"></a><i>Class match_results</i></h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >Regular expressions are different from many
|
|
simple pattern-matching algorithms in that as well as finding an
|
|
overall match they can also produce sub-expression matches: each
|
|
sub-expression being delimited in the pattern by a pair of
|
|
parenthesis (...). There has to be some method for reporting sub-expression
|
|
matches back to the user: this is achieved this by defining a
|
|
class <i>match_results</i> that acts as an indexed collection of
|
|
sub-expression matches, each sub-expression match being contained
|
|
in an object of type <i>sub_match</i>. </font></p>
|
|
|
|
<pre><font color="#000080"><i>//
|
|
// class sub_match:
|
|
// denotes one sub-expression match.
|
|
//
|
|
</i></font><b>template</b> <<b>class</b> iterator>
|
|
<b>struct</b> sub_match
|
|
{
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type value_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
|
<b>typedef</b> iterator iterator_type;
|
|
|
|
iterator first;
|
|
iterator second;
|
|
<b>bool</b> matched;
|
|
|
|
<b>operator</b> std::basic_string<value_type>()<b>const</b>;
|
|
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> sub_match& that)<b>const</b>;
|
|
<b>bool</b> <b>operator</b> !=(<b>const</b> sub_match& that)<b>const</b>;
|
|
difference_type length()<b>const</b>;
|
|
};
|
|
|
|
<font color="#000080">//
|
|
// class match_results:
|
|
// contains an indexed collection of matched sub-expressions.
|
|
//
|
|
</font><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator = std::allocator<<strong>typename</strong> std::iterator_traits<iterator>::value_type > >
|
|
<b>class</b> match_results
|
|
{
|
|
<b>public</b>:
|
|
<b>typedef</b> Allocator alloc_type;
|
|
<b>typedef</b> <b>typename</b> Allocator::<b>template</b> Rebind<iterator>::size_type size_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type char_type;
|
|
<b>typedef</b> sub_match<iterator> value_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
|
<b>typedef</b> iterator iterator_type;
|
|
<strong>explicit</strong> match_results(<b>const</b> Allocator& a = Allocator());
|
|
match_results(<b>const</b> match_results& m);
|
|
match_results& <b>operator</b>=(<b>const</b> match_results& m);
|
|
~match_results();
|
|
size_type size()<b>const</b>;
|
|
<b>const</b> sub_match<iterator>& <b>operator</b>[](<b>int</b> n) <b>const</b>;
|
|
Allocator allocator()<b>const</b>;
|
|
difference_type length(<b>int</b> sub = 0)<b>const</b>;
|
|
difference_type position(<b>unsigned</b> <b>int</b> sub = 0)<b>const</b>;
|
|
<b>unsigned</b> <b>int</b> line()<b>const</b>;
|
|
iterator line_start()<b>const</b>;
|
|
std::basic_string<char_type> str(<b>int</b> sub = 0)<b>const</b>;
|
|
<b>void</b> swap(match_results& that);
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> match_results& that)<b>const</b>;
|
|
<b>bool</b> <b>operator</b><(<b>const</b> match_results& that)<b>const</b>;
|
|
};
|
|
<strong>typedef</strong> match_results<<strong>const</strong> <strong>char</strong>*> cmatch;
|
|
<strong>typedef</strong> match_results<<strong>const</strong> <strong>wchar_t</strong>*> wcmatch; </pre>
|
|
|
|
<p><font >Class match_results is used for reporting what
|
|
matched a regular expression, it is passed to the matching
|
|
algorithms </font><a href="#query_match"><font >regex_match</font></a><font
|
|
> and </font><a href="#reg_search"><font >regex_search</font></a><font
|
|
>, and is used by </font><a href="#reg_grep"><font
|
|
>regex_grep</font></a><font > to notify the
|
|
callback function (or function object) what matched. Note that
|
|
the default allocator parameter has been chosen to match the
|
|
default allocator parameter to reg_expression. match_results has
|
|
the following public member functions: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_results(Allocator
|
|
a = Allocator());</font></td>
|
|
<td valign="top" width="45%"><font >Constructs an
|
|
instance of match_results, using allocator instance a.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_results(const
|
|
match_results& m);</font></td>
|
|
<td valign="top" width="45%"><font >Copy
|
|
constructor.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_results&
|
|
operator=(const match_results& m);</font></td>
|
|
<td valign="top" width="45%"><font >Assignment
|
|
operator.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>const</b>
|
|
sub_match<iterator>& <b>operator</b>[](size_type
|
|
n) const;</font></td>
|
|
<td valign="top" width="45%"><font >Returns what
|
|
matched, item 0 represents the whole string, item 1 the
|
|
first sub-expression and so on.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >Allocator&
|
|
allocator()const;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
allocator used by the class.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >difference_type
|
|
length(<b>unsigned int</b> sub = 0);</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
length of the matched subexpression, defaults to the
|
|
length of the whole match, in effect this is equivalent
|
|
to operator[](sub).second - operator[](sub).first.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >difference_type
|
|
position(<b>unsigned int</b> sub = 0);</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
position of the matched sub-expression, defaults to the
|
|
position of the whole match. The returned value is the
|
|
position of the match relative to the start of the string.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font ><b>unsigned</b>
|
|
<b>int</b> line()<b>const</b>;</font></td>
|
|
<td valign="top" width="45%"><font >Returns the
|
|
index of the line on which the match occurred, indices
|
|
start with 1, not zero. Equivalent to the number of
|
|
newline characters prior to operator[](0).first plus one.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >iterator
|
|
line_start()<b>const;</b></font></td>
|
|
<td valign="top" width="45%"><font >Returns an
|
|
iterator denoting the start of the line on which the
|
|
match occurred.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >size_type
|
|
size()<b>const;</b></font></td>
|
|
<td valign="top" width="45%"><font >Returns how
|
|
many sub-expressions are present in the match, including
|
|
sub-expression zero (the whole match). This is the case
|
|
even if no matches were found in the search operation -
|
|
you must use the returned value from </font><a
|
|
href="#reg_search"><font >regex_search</font></a><font
|
|
> / </font><a href="#query_match"><font >regex_match</font></a><font
|
|
> to determine whether any match occured.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font ><br>
|
|
</font></p>
|
|
|
|
<p><font >The operator[] member function needs further
|
|
explanation: it returns a const reference to a structure of type
|
|
sub_match<iterator>, which has the following public members:
|
|
<br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font ><b>typedef</b>
|
|
<b>typename</b> std::iterator_traits<iterator>::value_type
|
|
value_type;</font></td>
|
|
<td valign="top" width="44%"><font >The type
|
|
pointed to by the iterators.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font ><b>typedef</b>
|
|
<b>typename</b> std::iterator_traits<iterator>::difference_type
|
|
difference_type;</font></td>
|
|
<td valign="top" width="44%"><font >A type that
|
|
represents the difference between two iterators.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font ><b>typedef</b>
|
|
iterator iterator_type;</font></td>
|
|
<td valign="top" width="44%"><font >The iterator
|
|
type.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >iterator
|
|
first</font></td>
|
|
<td valign="top" width="44%"><font >An iterator
|
|
denoting the position of the start of the match.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >iterator
|
|
second</font></td>
|
|
<td valign="top" width="44%"><font >An iterator
|
|
denoting the position of the end of the match.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font ><b>bool</b>
|
|
matched</font></td>
|
|
<td valign="top" width="44%"><font >A Boolean
|
|
value denoting whether this sub-expression participated
|
|
in the match.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >difference_type
|
|
length()<b>const;</b></font></td>
|
|
<td valign="top" width="44%"><font >Returns the
|
|
length of the sub-expression match.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font ><b>operator</b>
|
|
std::basic_string<value_type> ()<b>const</b>;</font></td>
|
|
<td valign="top" width="44%"><font >Converts the
|
|
sub-expression match into an instance of std::basic_string<>.
|
|
Note that this member may be either absent, or present to
|
|
a more limited degree depending upon your compiler
|
|
capabilities.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font >Operator[] takes an integer as an argument that
|
|
denotes the sub-expression for which to return information, the
|
|
argument can take the following special values: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >-2</font></td>
|
|
<td valign="top" width="44%"><font >Returns
|
|
everything from the end of the match, to the end of the
|
|
input string, equivalent to $' in perl. If this is a null
|
|
string, then: </font><p><font >first == second </font></p>
|
|
<p><font >And </font></p>
|
|
<p><font >matched == false.</font></p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >-1</font></td>
|
|
<td valign="top" width="44%"><font >Returns
|
|
everything from the start of the input string (or the end
|
|
of the last match if this is a grep operation), to the
|
|
start of this match. Equivalent to $` in perl. If this is
|
|
a null string, then: </font><p><font >first ==
|
|
second </font></p>
|
|
<p><font >And </font></p>
|
|
<p><font >matched == false.</font></p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >0</font></td>
|
|
<td valign="top" width="44%"><font >Returns the
|
|
whole of what matched, equivalent to $& in perl. The
|
|
matched parameter is always true.</font></td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >0 < N <
|
|
size()</font></td>
|
|
<td valign="top" width="44%"><font >Returns what
|
|
matched sub-expression N, if this sub-expression did not
|
|
participate in the match then </font><p><font
|
|
>matched == false </font></p>
|
|
<p><font >otherwise: </font></p>
|
|
<p><font >matched == true.</font></p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><font >N < -2 or
|
|
N >= size()</font></td>
|
|
<td valign="top" width="44%"><font >Represents an
|
|
out-of range non-existent sub-expression. Returns a
|
|
"null" match in which </font><p><font >first
|
|
== last </font></p>
|
|
<p><font >And </font></p>
|
|
<p><font >matched == false.</font></p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font >Note that as well as being parameterised for an
|
|
allocator, match_results<> also takes an iterator type,
|
|
this allows any pair of iterators to be searched for a given
|
|
regular expression, provided the iterators have at least bi-directional
|
|
properties. </font></p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="query_match"></a>Algorithm regex_match</h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >The algorithm regex _match determines whether a
|
|
given regular expression matches a given sequence denoted by a
|
|
pair of iterators, the algorithm is defined as follows, note that
|
|
the result is true only if the expression matches the whole of
|
|
the input sequence, the main use of this function is data input
|
|
validation: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(iterator first,
|
|
iterator last,
|
|
match_results<iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >The library also defines the following
|
|
convenience versions, which take either a const charT*, or a
|
|
const std::basic_string<>& in place of a pair of
|
|
iterators [note - these versions may not be available, or may be
|
|
available in a more limited form, depending upon your compilers
|
|
capabilities]: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> charT* str,
|
|
match_results<<b>const</b> charT*, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default)
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >Finally there is a set of convenience versions
|
|
that simply return true or false and do not indicate what matched:
|
|
</font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(iterator first,
|
|
iterator last,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> charT* str,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default)
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >The parameters for the main function version
|
|
are as follows: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td width="51%"><font >iterator first</font></td>
|
|
<td><font >Denotes the start of the range to be
|
|
matched.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="51%"><font >iterator last</font></td>
|
|
<td valign="top" width="51%"><font >Denotes the
|
|
end of the range to be matched.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="51%"><font >match_results<iterator,
|
|
Allocator>& m</font></td>
|
|
<td valign="top" width="51%"><font >An instance
|
|
of match_results in which what matched will be reported.
|
|
On exit if a match occurred then m[0] denotes the whole
|
|
of the string that matched, m[0].first must be equal to
|
|
first, m[0].second will be less than or equal to last. m[1]
|
|
denotes the first subexpression m[2] the second
|
|
subexpression and so on. If no match occurred then m[0].first
|
|
= m[0].second = last.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="51%"><font >const
|
|
reg_expression<charT, traits, Allocator2>& e</font></td>
|
|
<td valign="top" width="51%"><font >Contains the
|
|
regular expression to be matched.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="51%"><font >unsigned
|
|
flags = match_default</font></td>
|
|
<td valign="top" width="51%"><font >Determines
|
|
the semantics used for matching, a combination of one or
|
|
more </font><a href="#match_type"><font >match_flags</font></a><font
|
|
> enumerators.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font >regex_match returns false if no match occurs or
|
|
true if it does. A match only occurs if it starts at <b>first</b>
|
|
and finishes at <b>last</b>. Example: the following </font><a
|
|
href="example/snippets/regex_match_example.cpp"><font >example</font></a><font
|
|
> processes an ftp response: </font></p>
|
|
|
|
<pre><font color="#008000">#include <stdlib.h>
|
|
#include <boost/regex.hpp>
|
|
#include <string>
|
|
#include <iostream>
|
|
|
|
</font><b>using namespace</b> boost;
|
|
|
|
regex expression(<font color="#000080">"([0-9]+)(\\-| |$)(.*)"</font>);
|
|
|
|
<font color="#000080"><i>// process_ftp:
|
|
// on success returns the ftp response code, and fills
|
|
// msg with the ftp response message.
|
|
</i></font><b>int</b> process_ftp(<b>const</b> <b>char</b>* response, std::string* msg)
|
|
{
|
|
cmatch what;
|
|
<b>if</b>(regex_match(response, what, expression))
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string
|
|
</i> <i>// what[1] contains the response code
|
|
</i> <i>// what[2] contains the separator character
|
|
</i> <i>// what[3] contains the text message.
|
|
</i></font> <b>if</b>(msg)
|
|
msg->assign(what[3].first, what[3].second);
|
|
<b>return</b> std::atoi(what[1].first);
|
|
}
|
|
<font color="#000080"> <i>// failure did not match
|
|
</i></font> <b>if</b>(msg)
|
|
msg->erase();
|
|
<b>return</b> -1;
|
|
}</pre>
|
|
|
|
<p><a name="match_type"></a><font >The value of the flags
|
|
parameter passed to the algorithm must be a combination of one or
|
|
more of the following values: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_default</font></td>
|
|
<td valign="top" width="45%"><font >The default
|
|
value, indicates that <b>first</b> represents the start
|
|
of a line, the start of a buffer, and (possibly) the
|
|
start of a word. Also implies that <b>last</b> represents
|
|
the end of a line, the end of the buffer and (possibly)
|
|
the end of a word. Implies that a dot sub-expression
|
|
"." will match both the newline character and a
|
|
null.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_bol</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>first</b> does not represent the
|
|
start of a new line.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_eol</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>last</b> does not represent the end
|
|
of a line.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_bob</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>first</b> is not the beginning of a
|
|
buffer.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_eob</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>last</b> does not represent the end
|
|
of a buffer.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_bow</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>first</b> can never match the start
|
|
of a word.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_eow</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then <b>last</b> can never match the end of a
|
|
word.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_dot_newline</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then a dot expression "." can not
|
|
match the newline character.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><font >match_not_dot_null</font></td>
|
|
<td valign="top" width="45%"><font >When this
|
|
flag is set then a dot expression "." can not
|
|
match a null character.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="75"> </td>
|
|
<td valign="top" width="45%" height="75"><font >match_prev_avail</font></td>
|
|
<td valign="top" width="45%" height="75"><font >When
|
|
this flag is set, then *--<b>first</b> is a valid
|
|
expression and the flags match_not_bol and match_not_bow
|
|
have no effect, since the value of the previous character
|
|
can be used to check these.</font></td>
|
|
<td width="5%" height="75"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15"><font >match_any</font></td>
|
|
<td valign="top" width="45%" height="15"><font >When
|
|
this flag is set, then the first string matched is
|
|
returned, rather than the longest possible match. This
|
|
flag can significantly reduce the time taken to find a
|
|
match, but what matches is undefined.</font></td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15"><font >match_not_null</font></td>
|
|
<td valign="top" width="45%" height="15"><font >When
|
|
this flag is set, then the expression will never match a
|
|
null string.</font></td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15"><font >match_continuous</font></td>
|
|
<td valign="top" width="45%" height="15"><font >When
|
|
this flags is set, then during a grep operation, each
|
|
successive match must start from where the previous match
|
|
finished.</font></td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15"><font >match_partial</font></td>
|
|
<td valign="top" width="45%" height="15"><font >When
|
|
this flag is set, the regex algorithms will report </font><a
|
|
href="#partial_matches">partial matches</a><font >
|
|
- that is where one or more characters at the end of the
|
|
text input matched some prefix of the regular expression.</font></td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p> </p>
|
|
|
|
<hr align="right">
|
|
|
|
<h3><a name="reg_search"></a>Algorithm regex_search</h3>
|
|
|
|
<p><font > #include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >The algorithm regex_search will search a range
|
|
denoted by a pair of iterators for a given regular expression.
|
|
The algorithm uses various heuristics to reduce the search time
|
|
by only checking for a match if a match could conceivably start
|
|
at that position. The algorithm is defined as follows: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(iterator first,
|
|
iterator last,
|
|
match_results<iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >The library also defines the following
|
|
convenience versions, which take either a const charT*, or a
|
|
const std::basic_string<>& in place of a pair of
|
|
iterators [note - these versions may not be available, or may be
|
|
available in a more limited form, depending upon your compilers
|
|
capabilities]: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(<b>const</b> charT* str,
|
|
match_results<<b>const</b> charT*, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >The parameters for the main function version
|
|
are as follows: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="50%"><font >iterator
|
|
first</font></td>
|
|
<td valign="top" width="50%"><font >The starting
|
|
position of the range to search.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >iterator last</font></td>
|
|
<td valign="top" width="50%"><font >The ending
|
|
position of the range to search.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >match_results<iterator,
|
|
Allocator>& m</font></td>
|
|
<td valign="top" width="50%"><font >An instance
|
|
of match_results in which what matched will be reported.
|
|
On exit if a match occurred then m[0] denotes the whole
|
|
of the string that matched, m[0].first and m[0].second
|
|
will be less than or equal to last. m[1] denotes the
|
|
first sub-expression m[2] the second sub-expression and
|
|
so on. If no match occurred then m[0].first = m[0].second
|
|
= last.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >const
|
|
reg_expression<charT, traits, Allocator2>& e</font></td>
|
|
<td valign="top" width="50%"><font >The regular
|
|
expression to search for.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >unsigned
|
|
flags = match_default</font></td>
|
|
<td valign="top" width="50%"><font >The flags
|
|
that determine what gets matched, a combination of one or
|
|
more </font><a href="#match_type"><font >match_flags</font></a><font
|
|
> enumerators.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font ><br>
|
|
</font></p>
|
|
|
|
<p><font >Example: the following </font><a
|
|
href="example/snippets/regex_search_example.cpp"><font >example</font></a><font
|
|
>, takes the contents of a file in the form of a string,
|
|
and searches for all the C++ class declarations in the file. The
|
|
code will work regardless of the way that std::string is
|
|
implemented, for example it could easily be modified to work with
|
|
the SGI rope class, which uses a non-contiguous storage strategy.
|
|
</font></p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
</font><font color="#000080"><i>
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)");
|
|
<b>
|
|
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
boost::match_results<std::string::const_iterator> what;
|
|
<b>unsigned</b> <b>int</b> flags = boost::match_default;
|
|
<b>while</b>(regex_search(start, end, what, expression, flags))
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string
|
|
</i> <i>// what[5] contains the class name.
|
|
</i> <i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map:
|
|
</i></font> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - file.begin();
|
|
<font color="#000080"><i>// update search position:
|
|
</i></font> start = what[0].second;
|
|
<font color="#000080"><i>// update flags:
|
|
</i></font> flags |= boost::match_prev_avail;
|
|
flags |= boost::match_not_bob;
|
|
}
|
|
}
|
|
</pre>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_grep"></a>Algorithm regex_grep</h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font > Regex_grep allows you to search through
|
|
an iterator range and locate all the (non-overlapping) matches
|
|
with a given regular expression. The function is declared as: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> iterator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
iterator first,
|
|
iterator last,
|
|
<b> const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b> unsigned</b> flags = match_default)</pre>
|
|
|
|
<p><font >The library also defines the following
|
|
convenience versions, which take either a const charT*, or a
|
|
const std::basic_string<>& in place of a pair of
|
|
iterators [note - these versions may not be available, or may be
|
|
available in a more limited form, depending upon your compilers
|
|
capabilities]: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
<b>const</b> charT* str,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p><font >The parameters for the primary version of
|
|
regex_grep have the following meanings: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="624">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="50%"><font >foo</font></td>
|
|
<td valign="top" width="50%"><font >A predicate
|
|
function object or function pointer, see below for more
|
|
information.</font></td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >first</font></td>
|
|
<td valign="top" width="50%"><font >The start of
|
|
the range to search.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >last</font></td>
|
|
<td valign="top" width="50%"><font >The end of
|
|
the range to search.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >e</font></td>
|
|
<td valign="top" width="50%"><font >The regular
|
|
expression to search for.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%"><font >flags</font></td>
|
|
<td valign="top" width="50%"><font >The flags
|
|
that determine how matching is carried out, one of the </font><a
|
|
href="#match_type"><font >match_flags</font></a><font
|
|
> enumerators.</font></td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font > The algorithm finds all of the non-overlapping
|
|
matches of the expression e, for each match it fills a </font><a
|
|
href="#reg_match"><font >match_results</font></a><font
|
|
><iterator, Allocator> structure, which contains
|
|
information on what matched, and calls the predicate foo, passing
|
|
the match_results<iterator, Allocator> as a single argument.
|
|
If the predicate returns true, then the grep operation continues,
|
|
otherwise it terminates without searching for further matches.
|
|
The function returns the number of matches found.</font></p>
|
|
|
|
<p><font >The general form of the predicate is: </font></p>
|
|
|
|
<pre><b>struct</b> grep_predicate
|
|
{
|
|
<b> bool</b> <b>operator</b>()(<b>const</b> match_results<iterator_type, expression_type::alloc_type>& m);
|
|
};</pre>
|
|
|
|
<p><font >For example the regular expression "a*b"
|
|
would find one match in the string "aaaaab" and two in
|
|
the string "aaabb". </font></p>
|
|
|
|
<p><font >Remember this algorithm can be used for a lot
|
|
more than implementing a version of grep, the predicate can be
|
|
and do anything that you want, grep utilities would output the
|
|
results to the screen, another program could index a file based
|
|
on a regular expression and store a set of bookmarks in a list,
|
|
or a text file conversion utility would output to file. The
|
|
results of one regex_grep can even be chained into another
|
|
regex_grep to create recursive parsers. </font></p>
|
|
|
|
<p><a href="example/snippets/regex_grep_example_1.cpp"><font
|
|
>Example</font></a><font >: convert the example
|
|
from <i>regex_search</i> to use <i>regex_grep</i> instead: </font></p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
|
|
</font><font color="#000080"><i>// IndexClasses:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
</i></font><b>
|
|
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)"
|
|
"[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>);
|
|
<b>
|
|
class</b> IndexClassesPred
|
|
{
|
|
map_type& m;
|
|
std::string::const_iterator base;
|
|
<b>public</b>:
|
|
IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {}
|
|
<b>bool</b> <b>operator</b>()(<b>const</b> match_results<std::string::const_iterator, regex::alloc_type>& what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string
|
|
</i> <i>// what[5] contains the class name.
|
|
</i> <i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map:
|
|
</i></font> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
};
|
|
<b>
|
|
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
regex_grep(IndexClassesPred(m, start), start, end, expression);
|
|
} </pre>
|
|
|
|
<p><a href="example/snippets/regex_grep_example_2.cpp"><font
|
|
>Example</font></a><font >: Use regex_grep to
|
|
call a global callback function: </font></p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
|
|
</font><font color="#000080"><i>// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
</i></font><b>
|
|
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>);
|
|
|
|
map_type class_index;
|
|
std::string::const_iterator base;
|
|
|
|
<b>bool</b> grep_callback(<b>const</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type>& what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string
|
|
</i> <i>// what[5] contains the class name.
|
|
</i> <i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map:
|
|
</i></font> class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
<b>
|
|
void</b> IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
regex_grep(grep_callback, start, end, expression, match_default);
|
|
}
|
|
</pre>
|
|
|
|
<p><a href="example/snippets/regex_grep_example_3.cpp"><font
|
|
>Example</font></a><font >: use regex_grep to
|
|
call a class member function, use the standard library adapters <i>std::mem_fun</i>
|
|
and <i>std::bind1st</i> to convert the member function into a
|
|
predicate: </font></p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
#include <functional>
|
|
</font><font color="#000080"><i>
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
|
|
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
<b>
|
|
class</b> class_index
|
|
{
|
|
boost::regex expression;
|
|
map_type index;
|
|
std::string::const_iterator base;
|
|
<b>bool</b> grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what);
|
|
<b>public</b>:
|
|
<b> void</b> IndexClasses(<b>const</b> std::string& file);
|
|
class_index()
|
|
: index(),
|
|
expression(<font
|
|
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
|
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
|
"(\\{|:[^;\\{()]*\\{)"
|
|
</font> ){}
|
|
};
|
|
<b>
|
|
bool</b> class_index::grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string
|
|
</i> <i>// what[5] contains the class name.
|
|
</i> <i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map:
|
|
</i></font> index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
|
|
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), <b>this</b>),
|
|
start,
|
|
end,
|
|
expression);
|
|
}
|
|
</pre>
|
|
|
|
<p><a href="example/snippets/regex_grep_example_4.cpp"><font
|
|
>Finally</font></a><font >, C++ Builder users can
|
|
use C++ Builder's closure type as a callback argument: </font></p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
#include <functional>
|
|
</font><font color="#000080"><i>
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
|
|
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
<b>class</b> class_index
|
|
{
|
|
boost::regex expression;
|
|
map_type index;
|
|
std::string::const_iterator base;
|
|
<b>typedef</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type> arg_type;
|
|
<b>bool</b> grep_callback(<b>const</b> arg_type& what);
|
|
<b>public</b>:
|
|
<b>typedef</b> <b>bool</b> (<b>__closure</b>* grep_callback_type)(<b>const</b> arg_type&);
|
|
<b>void</b> IndexClasses(<b>const</b> std::string& file);
|
|
class_index()
|
|
: index(),
|
|
expression(<font
|
|
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
|
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
|
"(\\{|:[^;\\{()]*\\{)"
|
|
</font> ){}
|
|
};
|
|
|
|
<b>bool</b> class_index::grep_callback(<b>const</b> arg_type& what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string </i>
|
|
<i>// what[5] contains the class name. </i>
|
|
<i>// what[6] contains the template specialisation if any. </i>
|
|
<i>// add class name and position to map: </i></font>
|
|
index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
|
|
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
class_index::grep_callback_type cl = &(<b>this</b>->grep_callback);
|
|
regex_grep(cl,
|
|
start,
|
|
end,
|
|
expression);
|
|
} </pre>
|
|
|
|
<hr>
|
|
|
|
<h3> <a name="reg_format"></a>Algorithm regex_format</h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >The algorithm regex_format takes the results of
|
|
a match and creates a new string based upon a </font><a
|
|
href="format_string.htm#format_string"><font >format
|
|
string</font></a><font >, regex_format can be used for
|
|
search and replace operations: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_format(OutputIterator out,
|
|
<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> charT* fmt,
|
|
<b>unsigned</b> flags = 0);
|
|
<b>
|
|
template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_format(OutputIterator out,
|
|
<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b>unsigned</b> flags = 0);</pre>
|
|
|
|
<p><font >The library also defines the following
|
|
convenience variation of regex_format, which returns the result
|
|
directly as a string, rather than outputting to an iterator [note
|
|
- this version may not be available, or may be available in a
|
|
more limited form, depending upon your compilers capabilities]: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_format
|
|
(<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> charT* fmt,
|
|
<b>unsigned</b> flags = 0);
|
|
|
|
<b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_format
|
|
(<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b>unsigned</b> flags = 0);</pre>
|
|
|
|
<p><font >Parameters to the main version of the function
|
|
are passed as follows: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font >OutputIterator
|
|
out</font></td>
|
|
<td valign="top" width="44%"><font >An output
|
|
iterator type, the output string is sent to this iterator.
|
|
Typically this would be a std::ostream_iterator.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font ><b>const</b>
|
|
match_results<iterator, Allocator>& m</font></td>
|
|
<td valign="top" width="44%"><font >An instance
|
|
of match_results<> obtained from one of the
|
|
matching algorithms above, and denoting what matched.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font ><b>const</b>
|
|
charT* fmt</font></td>
|
|
<td valign="top" width="44%"><font >A format
|
|
string that determines how the match is transformed into
|
|
the new string.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font ><b>unsigned</b>
|
|
flags</font></td>
|
|
<td valign="top" width="44%"><font >Optional
|
|
flags which describe how the format string is to be
|
|
interpreted.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><a name="format_flags"></a><font >Format flags are
|
|
defined as follows: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font >format_all</font></td>
|
|
<td valign="top" width="43%"><font >Enables all
|
|
syntax options (perl-like plus extentions).</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font >format_sed</font></td>
|
|
<td valign="top" width="43%"><font >Allows only a
|
|
sed-like syntax.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font >format_perl</font></td>
|
|
<td valign="top" width="43%"><font >Allows only a
|
|
perl-like syntax.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><font >format_no_copy</font></td>
|
|
<td valign="top" width="43%"><font >Disables
|
|
copying of unmatched sections to the output string during
|
|
</font><a href="#reg_merge"><font >regex_merge</font></a><font
|
|
> operations.</font></td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td>format_first_only</td>
|
|
<td>When this flag is set only the first occurance will
|
|
be replaced (applies to regex_merge only).</td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font ><br>
|
|
</font></p>
|
|
|
|
<p><font >The format string syntax (and available options)
|
|
is described more fully under </font><a
|
|
href="format_string.htm#format_string"><font >format
|
|
strings</font></a><font >. </font></p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_merge"></a>Algorithm regex_merge</h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >The algorithm regex_merge is a combination of </font><a
|
|
href="#reg_grep"><font >regex_grep</font></a><font
|
|
> and </font><a href="#reg_format"><font >regex_format</font></a><font
|
|
>. That is, it greps through the string finding all the
|
|
matches to the regular expression, for each match it then calls </font><a
|
|
href="#reg_format"><font >regex_format</font></a><font
|
|
> to format the string and sends the result to the output
|
|
iterator. Sections of text that do not match are copied to the
|
|
output unchanged only if the flags parameter does not have the
|
|
flag </font><a href="#format_flags"><font >format_no_copy</font></a><font
|
|
> set. If the flag </font><a href="#format_flags"><font
|
|
>format_first_only</font></a><font > is set then
|
|
only the first occurance is replaced rather than all occurances.</font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_merge(OutputIterator out,
|
|
iterator first,
|
|
iterator last,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> charT* fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_merge(OutputIterator out,
|
|
iterator first,
|
|
iterator last,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
std::basic_string<charT>& fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
|
|
|
<p><font >The library also defines the following
|
|
convenience variation of regex_merge, which returns the result
|
|
directly as a string, rather than outputting to an iterator [note
|
|
- this version may not be available, or may be available in a
|
|
more limited form, depending upon your compilers capabilities]: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> charT* fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
|
|
|
<p><font >Parameters to the main version of the function
|
|
are passed as follows: <br>
|
|
</font></p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font >OutputIterator
|
|
out</font></td>
|
|
<td valign="top" width="45%"><font >An output
|
|
iterator type, the output string is sent to this iterator.
|
|
Typically this would be a std::ostream_iterator.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font >iterator
|
|
first</font></td>
|
|
<td valign="top" width="45%"><font >The start of
|
|
the range of text to grep.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font >iterator last</font></td>
|
|
<td valign="top" width="45%"><font >The end of
|
|
the range of text to grep.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font ><b>const</b>
|
|
reg_expression<charT, traits, Allocator>& e</font></td>
|
|
<td valign="top" width="45%"><font >The
|
|
expression to search for.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font ><b>const</b>
|
|
charT* fmt</font></td>
|
|
<td valign="top" width="45%"><font >The format
|
|
string to be applied to sections of text that match.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><font ><b>unsigned</b>
|
|
<b>int</b> flags = match_default</font></td>
|
|
<td valign="top" width="45%"><font >Flags which
|
|
determine how the expression is matched - see </font><a
|
|
href="#match_type"><font >match_flags</font></a><font
|
|
>, and how the format string is interpreted - see
|
|
</font><a href="#format_flags"><font >format_flags</font></a><font
|
|
>.</font></td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><font >Example: the following </font><a
|
|
href="example/snippets/regex_merge_example.cpp"><font >example</font></a><font
|
|
> takes C/C++ source code as input, and outputs syntax
|
|
highlighted HTML code. </font></p>
|
|
|
|
<pre>
|
|
<font color="#008080">#include <fstream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <iterator>
|
|
#include <boost/regex.hpp>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
</font>
|
|
<font color="#000080"><i>// purpose:
|
|
// takes the contents of a file and transform to
|
|
// syntax highlighted code in html format
|
|
</i></font>
|
|
boost::regex e1, e2;
|
|
<b>extern</b> <b>const</b> <b>char</b>* expression_text;
|
|
<b>extern</b> <b>const</b> <b>char</b>* format_string;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_expression;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_format;
|
|
<b>extern</b> <b>const</b> <b>char</b>* header_text;
|
|
<b>extern</b> <b>const</b> <b>char</b>* footer_text;
|
|
|
|
<b>void</b> load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
<b>char</b> c;
|
|
<b>while</b>(is.get(c))
|
|
{
|
|
<b>if</b>(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * <font color="#000080">3</font>);
|
|
s.append(<font color="#000080">1</font>, c);
|
|
}
|
|
}
|
|
|
|
<b>int</b> main(<b>int</b> argc, <b>const</b> <b>char</b>** argv)
|
|
{
|
|
try{
|
|
e1.assign(expression_text);
|
|
e2.assign(pre_expression);
|
|
<b>for</b>(<b>int</b> i = <font color="#000080">1</font>; i < argc; ++i)
|
|
{
|
|
std::cout << <font color="#0000FF">"Processing file "</font> << argv[i] << std::endl;
|
|
std::ifstream fs(argv[i]);
|
|
std::string in;
|
|
load_file(in, fs);
|
|
std::string out_name(std::string(argv[i]) + std::string(<font
|
|
color="#0000FF">".htm"</font>));
|
|
std::ofstream os(out_name.c_str());
|
|
os << header_text;
|
|
<font color="#000080"><i>// strip '<' and '>' first by outputting to a
|
|
</i></font> <font color="#000080"><i>// temporary string stream
|
|
</i></font> std::ostringstream t(std::ios::out | std::ios::binary);
|
|
std::ostream_iterator<<b>char</b>, <b>char</b>> oi(t);
|
|
boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format);
|
|
<font color="#000080"><i>// then output to final output stream
|
|
</i></font> <font color="#000080"><i>// adding syntax highlighting:
|
|
</i></font> std::string s(t.str());
|
|
std::ostream_iterator<<b>char</b>, <b>char</b>> out(os);
|
|
boost::regex_merge(out, s.begin(), s.end(), e1, format_string);
|
|
os << footer_text;
|
|
}
|
|
}
|
|
<strong>catch</strong>(...)
|
|
{ <strong>return</strong> -1; }
|
|
<b>return</b> <font color="#000080">0</font>;
|
|
}
|
|
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_expression = <font
|
|
color="#0000FF">"(<)|(>)|\\r"</font>;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_format = <font
|
|
color="#0000FF">"(?1<)(?2>)"</font>;
|
|
|
|
|
|
<b>const</b> <b>char</b>* expression_text = <font color="#000080"><i>// preprocessor directives: index 1
|
|
</i></font> <font color="#0000FF">"(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
|
|
</font> <font color="#000080"><i>// comment: index 2
|
|
</i></font> <font color="#0000FF">"(//[^\\n]*|/\\*.*?\\*/)|"
|
|
</font> <font color="#000080"><i>// literals: index 3
|
|
</i></font> <font color="#0000FF">"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
|
|
</font> <font color="#000080"><i>// string literals: index 4
|
|
</i></font> <font color="#0000FF">"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
|
|
</font> <font color="#000080"><i>// keywords: index 5
|
|
</i></font> <font color="#0000FF">"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
|
|
</font> <font color="#0000FF">"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
|
|
</font> <font color="#0000FF">"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
|
|
</font> <font color="#0000FF">"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
|
|
</font> <font color="#0000FF">"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
|
|
</font> <font color="#0000FF">"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
|
|
</font> <font color="#0000FF">"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
|
|
</font> <font color="#0000FF">"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
|
|
</font> <font color="#0000FF">"|using|virtual|void|volatile|wchar_t|while)\\>"
|
|
</font> ;
|
|
|
|
<b>const</b> <b>char</b>* format_string = <font color="#0000FF">"(?1<font color=\"#008040\">$&</font>)"
|
|
</font> <font color="#0000FF">"(?2<I><font color=\"#000080\">$&</font></I>)"
|
|
</font> <font color="#0000FF">"(?3<font color=\"#0000A0\">$&</font>)"
|
|
</font> <font color="#0000FF">"(?4<font color=\"#0000FF\">$&</font>)"
|
|
</font> <font color="#0000FF">"(?5<B>$&</B>)"</font>;
|
|
|
|
<b>const</b> <b>char</b>* header_text = <font color="#0000FF">"<HTML>\n<HEAD>\n"
|
|
</font> <font color="#0000FF">"<TITLE>Auto-generated html formated source</TITLE>\n"
|
|
</font> <font color="#0000FF">"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
|
|
</font> <font color="#0000FF">"</HEAD>\n"
|
|
</font> <font color="#0000FF">"<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
|
|
</font> <font color="#0000FF">"<P> </P>\n<PRE>"</font>;
|
|
|
|
<b>const</b> <b>char</b>* footer_text = <font color="#0000FF">"</PRE>\n</BODY>\n\n"</font>;</pre>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regex_split"></a>Algorithm regex_split</h3>
|
|
|
|
<p><font >#include <</font><a
|
|
href="../../boost/regex.hpp"><font >boost/regex.hpp</font></a><font
|
|
>> </font></p>
|
|
|
|
<p><font >Algorithm regex_split performs a similar
|
|
operation to the perl split operation, and comes in three
|
|
overloaded forms: </font></p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
|
<b> unsigned</b> flags,
|
|
std::size_t max_split);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s);</pre>
|
|
|
|
<p><font >Each version takes an output-iterator for
|
|
output, and a string for input. If the expression contains no
|
|
marked sub-expressions, then the algorithm writes one string onto
|
|
the output-iterator for each section of input that does not match
|
|
the expression. If the expression does contain marked sub-expressions,
|
|
then each time a match is found, one string for each marked sub-expression
|
|
will be written to the output-iterator. No more than <i>max_split
|
|
</i>strings will be written to the output-iterator. Before
|
|
returning, all the input processed will be deleted from the
|
|
string <i>s</i> (if <i>max_split </i>is not reached then all of <i>s</i>
|
|
will be deleted). Returns the number of strings written to the
|
|
output-iterator. If the parameter <i>max_split</i> is not
|
|
specified then it defaults to UINT_MAX. If no expression is
|
|
specified, then it defaults to "\s+", and splitting
|
|
occurs on whitespace. </font></p>
|
|
|
|
<p><a href="example/snippets/regex_split_example_1.cpp"><font
|
|
>Example</font></a><font >: the following
|
|
function will split the input string into a series of tokens, and
|
|
remove each token from the string <i>s</i>: </font></p>
|
|
|
|
<pre><b>unsigned</b> tokenise(std::list<std::string>& l, std::string& s)
|
|
{
|
|
<b> return</b> boost::regex_split(std::back_inserter(l), s);
|
|
}</pre>
|
|
|
|
<p><a href="example/snippets/regex_split_example_2.cpp"><font
|
|
>Example</font></a><font >: the following short
|
|
program will extract all of the URL's from a html file, and print
|
|
them out to <i>cout</i>: </font></p>
|
|
|
|
<pre><font color="#008000">#include <list>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <boost/regex.hpp>
|
|
</font>
|
|
boost::regex e(<font color="#000080">"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</font>,
|
|
boost::regbase::normal | boost::regbase::icase);
|
|
|
|
<b>void</b> load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
<font color="#000080">//
|
|
// attempt to grow string buffer to match file size,
|
|
// this doesn't always work...
|
|
</font> s.reserve(is.rdbuf()-&gtin_avail());
|
|
<b>char</b> c;
|
|
<b>while</b>(is.get(c))
|
|
{
|
|
<font color="#000080">// use logarithmic growth stategy, in case
|
|
// in_avail (above) returned zero:
|
|
</font> <b>if</b>(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
|
|
|
|
<b>int</b> main(<b>int</b> argc, <b>char</b>** argv)
|
|
{
|
|
std::string s;
|
|
std::list<std::string> l;
|
|
|
|
<b>for</b>(<b>int</b> i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << <font color="#000080">"Findings URL's in "</font> << argv[i] << <font
|
|
color="#000080">":"</font> << std::endl;
|
|
s.erase();
|
|
std::ifstream is(argv[i]);
|
|
load_file(s, is);
|
|
boost::regex_split(std::back_inserter(l), s, e);
|
|
<b>while</b>(l.size())
|
|
{
|
|
s = *(l.begin());
|
|
l.pop_front();
|
|
std::cout << s << std::endl;
|
|
}
|
|
}
|
|
<b>return</b> 0;
|
|
}</pre>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="partial_matches"></a>Partial Matches</h3>
|
|
|
|
<p>The match-flag <code>match_partial</code> can be passed to the
|
|
following algorithms: <a href="#reg_match">regex_match</a>, <a
|
|
href="#reg_search">regex_search</a>, and <a href="#reg_grep">regex_grep</a>.
|
|
When used it indicates that partial as well as full matches
|
|
should be found. A partial match is one that matched one or more
|
|
characters at the end of the text input, but did not match all of
|
|
the regular expression (although it may have done so had more
|
|
input been available). Partial matches are typically used when
|
|
either validating data input (checking each character as it is
|
|
entered on the keyboard), or when searching texts that are either
|
|
too long to load into memory (or even into a memory mapped file),
|
|
or are of indeterminate length (for example the source may be a
|
|
socket or similar). Partial and full matches can be
|
|
differentiated as shown in the following table (the variable M
|
|
represents an instance of match_results<> as filled in by
|
|
regex_match, regex_search or regex_grep):<br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="638">
|
|
<tr>
|
|
<td valign="top" width="20%"> </td>
|
|
<td valign="top" width="20%">Result</td>
|
|
<td valign="top" width="20%">M[0].matched</td>
|
|
<td valign="top" width="20%">M[0].first</td>
|
|
<td valign="top" width="20%">M[0].second</td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="20%">No match</td>
|
|
<td valign="top" width="20%">False</td>
|
|
<td valign="top" width="20%">Undefined</td>
|
|
<td valign="top" width="20%">Undefined</td>
|
|
<td valign="top" width="20%">Undefined</td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="20%">Partial match</td>
|
|
<td valign="top" width="20%">True</td>
|
|
<td valign="top" width="20%">False</td>
|
|
<td valign="top" width="20%">Start of partial match.</td>
|
|
<td valign="top" width="20%">End of partial match (end of
|
|
text).</td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="20%">Full match</td>
|
|
<td valign="top" width="20%">True</td>
|
|
<td valign="top" width="20%">True</td>
|
|
<td valign="top" width="20%">Start of full match.</td>
|
|
<td valign="top" width="20%">End of full match.</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p>The following <a
|
|
href="example/snippets/partial_regex_match.cpp">example</a> tests
|
|
to see whether the text could be a valid credit card number, as
|
|
the user presses a key, the character entered would be added to
|
|
the string being built up, and passed to <code>is_possible_card_number</code>.
|
|
If this returns true then the text could be a valid card number,
|
|
so the user interface's OK button would be enabled. If it returns
|
|
false, then this is not yet a valid card number, but could be
|
|
with more input, so the user interface would disable the OK
|
|
button. Finally, if the procedure throws an exception the input
|
|
could never become a valid number, and the inputted character
|
|
must be discarded, and a suitable error indication displayed to
|
|
the user.</p>
|
|
|
|
<pre>#include <string>
|
|
#include <iostream>
|
|
#include <boost/regex.hpp>
|
|
|
|
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
|
|
|
bool is_possible_card_number(const std::string& input)
|
|
{
|
|
//
|
|
// return false for partial match, true for full match, or throw for
|
|
// impossible match based on what we have so far...
|
|
boost::match_results<std::string::const_iterator> what;
|
|
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
|
{
|
|
// the input so far could not possibly be valid so reject it:
|
|
throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
|
|
}
|
|
// OK so far so good, but have we finished?
|
|
if(what[0].matched)
|
|
{
|
|
// excellent, we have a result:
|
|
return true;
|
|
}
|
|
// what we have so far is only a partial match...
|
|
return false;
|
|
}</pre>
|
|
|
|
<p>In the following <a
|
|
href="example/snippets/partial_regex_match.cpp">example</a>, text
|
|
input is taken from a stream containing an unknown amount of
|
|
text; this example simply counts the number of html tags
|
|
encountered in the stream. The text is loaded into a buffer and
|
|
searched a part at a time, if a partial match was encountered,
|
|
then the partial match gets searched a second time as the start
|
|
of the next batch of text:</p>
|
|
|
|
<pre>#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <boost/regex.hpp>
|
|
|
|
// match some kind of html tag:
|
|
boost::regex e("<[^>]*>");
|
|
// count how many:
|
|
unsigned int tags = 0;
|
|
// saved position of partial match:
|
|
char* next_pos = 0;
|
|
|
|
bool grep_callback(const boost::match_results<char*>& m)
|
|
{
|
|
if(m[0].matched == false)
|
|
{
|
|
// save position and return:
|
|
next_pos = m[0].first;
|
|
}
|
|
else
|
|
++tags;
|
|
return true;
|
|
}
|
|
|
|
void search(std::istream& is)
|
|
{
|
|
char buf[4096];
|
|
next_pos = buf + sizeof(buf);
|
|
bool have_more = true;
|
|
while(have_more)
|
|
{
|
|
// how much do we copy forward from last try:
|
|
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
|
// and how much is left to fill:
|
|
unsigned size = next_pos - buf;
|
|
// copy forward whatever we have left:
|
|
memcpy(buf, next_pos, leftover);
|
|
// fill the rest from the stream:
|
|
unsigned read = is.readsome(buf + leftover, size);
|
|
// check to see if we've run out of text:
|
|
have_more = read == size;
|
|
// reset next_pos:
|
|
next_pos = buf + sizeof(buf);
|
|
// and then grep:
|
|
boost::regex_grep(grep_callback,
|
|
buf,
|
|
buf + read + leftover,
|
|
e,
|
|
boost::match_default | boost::match_partial);
|
|
}
|
|
}</pre>
|
|
|
|
<hr align="left">
|
|
|
|
<p><font ><i>Copyright </i></font><a
|
|
href="mailto:John_Maddock@compuserve.com"><font ><i>Dr
|
|
John Maddock</i></font></a><font ><i> 1998-2001 all
|
|
rights reserved.</i> </font></p>
|
|
</body>
|
|
</html>
|
|
|