mirror of
https://github.com/boostorg/regex.git
synced 2025-07-13 20:36:39 +02:00
2478 lines
121 KiB
HTML
2478 lines
121 KiB
HTML
<html>
|
||
|
||
<head>
|
||
<meta http-equiv="Content-Type"
|
||
content="text/html; charset=iso-8859-1">
|
||
<meta name="Template"
|
||
content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
|
||
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
|
||
<title>Regex++, template class and algorithm reference</title>
|
||
</head>
|
||
|
||
<body bgcolor="#FFFFFF" link="#0000FF" vlink="#800080">
|
||
|
||
<p> </p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top"><h3 align="right"><img
|
||
src="../../c++boost.gif" alt="C++ Boost" width="276"
|
||
height="86"></h3>
|
||
</td>
|
||
<td valign="top"><h3 align="center"><i>Regex++ template
|
||
class reference.</i></h3>
|
||
<p align="left"><i>Copyright (c) 1998-2001 </i></p>
|
||
<p align="left"><i>Dr John Maddock</i></p>
|
||
<p align="left"><i>Permission to use, copy, modify,
|
||
distribute and sell this software and its documentation
|
||
for any purpose is hereby granted without fee, provided
|
||
that the above copyright notice appear in all copies and
|
||
that both that copyright notice and this permission
|
||
notice appear in supporting documentation. Dr John
|
||
Maddock makes no representations about the suitability of
|
||
this software for any purpose. It is provided "as is"
|
||
without express or implied warranty.</i></p>
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="regbase"></a>class regbase</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>Class regbase is the template argument independent base class
|
||
for reg_expression, the only public members are the <i>flag_type</i>
|
||
enumerated values that determine how regular expressions are
|
||
interpreted. </p>
|
||
|
||
<pre><b>class</b> regbase
|
||
{
|
||
<b>public</b>:
|
||
<b>enum</b> flag_type_
|
||
{
|
||
escape_in_lists = 1, <font
|
||
color="#000080">// '\\' special inside [...]
|
||
</font> char_classes = escape_in_lists << 1, <font
|
||
color="#000080"><i>// [[:CLASS:]] allowed
|
||
</i></font> intervals = char_classes << 1, <font
|
||
color="#000080"><i>// {x,y} allowed
|
||
</i></font> limited_ops = intervals << 1, <font
|
||
color="#000080"><i>// all of + ? and | are normal characters
|
||
</i></font> newline_alt = limited_ops << 1, <font
|
||
color="#000080"><i>// \n is the same as |
|
||
</i></font> bk_plus_qm = newline_alt << 1, <font
|
||
color="#000080"><i>// uses \+ and \?
|
||
</i></font> bk_braces = bk_plus_qm << 1, <font
|
||
color="#000080"><i>// uses \{ and \}
|
||
</i></font> bk_parens = bk_braces << 1, <font
|
||
color="#000080"><i>// uses \( and \)
|
||
</i></font> bk_refs = bk_parens << 1, <font
|
||
color="#000080"><i>// \d allowed
|
||
</i></font> bk_vbar = bk_refs << 1, <font
|
||
color="#000080"><i>// uses \|
|
||
</i></font> use_except = bk_vbar << 1, <font
|
||
color="#000080"><i>// exception on error
|
||
</i></font> failbit = use_except << 1, <font
|
||
color="#000080"><i>// error flag
|
||
</i></font> literal = failbit << 1, <font
|
||
color="#000080"><i>// all characters are literals
|
||
</i></font> icase = literal << 1, <font
|
||
color="#000080"><i>// characters are matched regardless of case
|
||
</i></font> nocollate = icase << 1, <font
|
||
color="#000080"><i>// don't use locale specific collation
|
||
</i></font>
|
||
basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
|
||
extended = char_classes | intervals | bk_refs,
|
||
normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate,
|
||
emacs = bk_braces | bk_parens | bk_refs | bk_vbar,
|
||
awk = extended | escape_in_lists,
|
||
grep = basic | newline_alt,
|
||
egrep = extended | newline_alt,
|
||
sed = basic,
|
||
perl = normal
|
||
};
|
||
<b>typedef</b> <b>unsigned</b> <b>int</b> flag_type;
|
||
}; </pre>
|
||
|
||
<p> <br>
|
||
<br>
|
||
</p>
|
||
|
||
<p>The enumerated type <i>regbase::flag_type</i> determines the
|
||
syntax rules for regular expression compilation, the various
|
||
flags have the following effects: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::escape_in_lists</td>
|
||
<td valign="top" width="45%">Allows the use of the escape
|
||
"\" character in sets of characters, for
|
||
example [\]] represents the set of characters containing
|
||
only "]". If this flag is not set then "\"
|
||
is an ordinary character inside sets.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::char_classes</td>
|
||
<td valign="top" width="45%">When this bit is set,
|
||
character classes [:classname:] are allowed inside
|
||
character set declarations, for example "[[:word:]]"
|
||
represents the set of all characters that belong to the
|
||
character class "word".</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: intervals</td>
|
||
<td valign="top" width="45%">When this bit is set,
|
||
repetition intervals are allowed, for example "a{2,4}"
|
||
represents a repeat of between 2 and 4 letter a's.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: limited_ops</td>
|
||
<td valign="top" width="45%">When this bit is set all of
|
||
"+", "?" and "|" are
|
||
ordinary characters in all situations.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: newline_alt</td>
|
||
<td valign="top" width="45%">When this bit is set, then
|
||
the newline character "\n" has the same effect
|
||
as the alternation operator "|".</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: bk_plus_qm</td>
|
||
<td valign="top" width="45%">When this bit is set then
|
||
"\+" represents the one or more repetition
|
||
operator and "\?" represents the zero or one
|
||
repetition operator. When this bit is not set then
|
||
"+" and "?" are used instead.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: bk_braces</td>
|
||
<td valign="top" width="45%">When this bit is set then
|
||
"\{" and "\}" are used for bounded
|
||
repetitions and "{" and "}" are
|
||
normal characters. This is the opposite of default
|
||
behavior.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: bk_parens</td>
|
||
<td valign="top" width="45%">When this bit is set then
|
||
"\(" and "\)" are used to group sub-expressions
|
||
and "(" and ")" are ordinary
|
||
characters, this is the opposite of default behaviour.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: bk_refs</td>
|
||
<td valign="top" width="45%">When this bit is set then
|
||
back references are allowed.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: bk_vbar</td>
|
||
<td valign="top" width="45%">When this bit is set then
|
||
"\|" represents the alternation operator and
|
||
"|" is an ordinary character. This is the
|
||
opposite of default behaviour.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: use_except</td>
|
||
<td valign="top" width="45%">When this bit is set then a <a
|
||
href="#bad_expression">bad_expression</a> exception will
|
||
be thrown on error. Use of this flag is deprecated
|
||
- reg_expression will always throw on error.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase:: failbit</td>
|
||
<td valign="top" width="45%">This bit is set on error, if
|
||
regbase::use_except is not set, then this bit should be
|
||
checked to see if a regular expression is valid before
|
||
usage.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::literal</td>
|
||
<td valign="top" width="45%">All characters in the string
|
||
are treated as literals, there are no special characters
|
||
or escape sequences.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::icase</td>
|
||
<td valign="top" width="45%">All characters in the string
|
||
are matched regardless of case.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::nocollate</td>
|
||
<td valign="top" width="45%">Locale specific collation is
|
||
disabled when dealing with ranges in character set
|
||
declarations. For example when this bit is set the
|
||
expression [a-c] would match the characters a, b and c
|
||
only regardless of locale, where as when this is not set
|
||
, then [a-c] matches any character which collates in the
|
||
range a to c.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%"> </td>
|
||
<td valign="top" width="45%">regbase::basic</td>
|
||
<td valign="top" width="45%">Equivalent to the POSIX
|
||
basic regular expression syntax: char_classes | intervals
|
||
| limited_ops | bk_braces | bk_parens | bk_refs.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%"> </td>
|
||
<td valign="top" width="45%">Regbase::extended</td>
|
||
<td valign="top" width="45%">Equivalent to the POSIX
|
||
extended regular expression syntax: char_classes |
|
||
intervals | bk_refs.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::normal</td>
|
||
<td valign="top" width="45%" height="24">This is the
|
||
default setting, and represents how most people expect
|
||
the library to behave. Equivalent to the POSIX extended
|
||
syntax, but with locale specific collation disabled, and
|
||
escape characters inside set declarations enabled:
|
||
regbase::escape_in_lists | regbase::char_classes |
|
||
regbase::intervals | regbase::bk_refs | regbase::nocollate.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::emacs</td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatability with the emacs editor, eqivalent to:
|
||
bk_braces | bk_parens | bk_refs | bk_vbar.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::awk </td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatabilty with the Unix utility Awk, the same as POSIX
|
||
extended regular expressions, but allows escapes inside
|
||
bracket-expressions (character sets). Equivalent to
|
||
extended | escape_in_lists.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::grep</td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatabilty with the Unix grep utility, the same as
|
||
POSIX basic regular expressions, but with the newline
|
||
character equivalent to the alternation operator. the
|
||
same as basic | newline_alt.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::egrep</td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatabilty with the Unix egrep utility, the same as
|
||
POSIX extended regular expressions, but with the newline
|
||
character equivalent to the alternation operator. the
|
||
same as extended | newline_alt.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::sed</td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatabilty with the Unix sed utility, the same as POSIX
|
||
basic regular expressions.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
<td valign="top" width="45%" height="24">regbase::perl</td>
|
||
<td valign="top" width="45%" height="24">Provides
|
||
compatibility with the perl programming language, the
|
||
same as regbase::normal.</td>
|
||
<td valign="top" width="5%" height="24"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="bad_expression"></a>Exception classes.</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex/pattern_except.hpp">boost/pat_except.hpp</a>>
|
||
</p>
|
||
|
||
<p>An instance of <i>bad_expression</i> is thrown whenever a bad
|
||
regular expression is encountered. </p>
|
||
|
||
<pre><b>namespace</b> boost{
|
||
|
||
<b>class</b> bad_pattern : <b>public</b> std::runtime_error
|
||
{
|
||
<b>public</b>:
|
||
<b>explicit</b> bad_pattern(<b>const</b> std::string& s) : std::runtime_error(s){};
|
||
};
|
||
|
||
<b>class</b> bad_expression : <b>public</b> bad_pattern
|
||
{
|
||
<b>public</b>:
|
||
bad_expression(<b>const</b> std::string& s) : bad_pattern(s) {}
|
||
};
|
||
|
||
|
||
} // namespace boost</pre>
|
||
|
||
<p>Footnotes: the class <i>bad_pattern </i>forms the base class
|
||
for all pattern-matching exceptions, of which <i>bad_expression</i>
|
||
is one. The choice of <i>std::runtime_error </i>as the base class
|
||
for <i>bad_pattern</i> is moot, depending upon how the library is
|
||
used exceptions may be either logic errors (programmer supplied
|
||
expressions) or run time errors (user supplied expressions). </p>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="reg_expression"></a>Class reg_expression</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>The template class <i>reg_expression </i>encapsulates regular
|
||
expression parsing and compilation. The class derives from class <a
|
||
href="#regbase"><i>regbase</i></a> and takes three template
|
||
parameters: </p>
|
||
|
||
<p><b><i>charT</i></b>: determines the character type, i.e.
|
||
either char or wchar_t. </p>
|
||
|
||
<p><b><i>traits</i></b>: determines the behaviour of the
|
||
character type, for example whether character matching is case
|
||
sensitive or not, and which character class names are recognized.
|
||
A default traits class is provided: <a href="#regex_char_traits">regex_traits<charT></a>.
|
||
</p>
|
||
|
||
<p><b><i>Allocator</i></b>: the allocator class used to allocate
|
||
memory by the class. </p>
|
||
|
||
<p>For ease of use there are two typedefs that define the two
|
||
standard <i>reg_expression</i> instances, unless you want to use
|
||
custom allocators, you won't need to use anything other than
|
||
these: </p>
|
||
|
||
<pre><b>namespace</b> boost{
|
||
<b>template</b> <<b>class</b> charT, <b>class</b> traits = regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
||
<b>class</b> reg_expression;
|
||
<b>typedef</b> reg_expression<<b>char</b>> regex;
|
||
<b>typedef</b> reg_expression<<b>wchar_t> </b>wregex;
|
||
}</pre>
|
||
|
||
<p>The definition of <i>reg_expression</i> follows: it is based
|
||
very closely on class basic_string, and fulfils the requirements
|
||
for a container of <i>charT</i>. </p>
|
||
|
||
<pre><b>namespace</b> boost{
|
||
<b>template</b> <<b>class</b> charT, <b>class</b> traits = char_regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
||
<b>class</b> reg_expression : <b>public</b> regbase
|
||
{
|
||
<b>public</b>:
|
||
<font color="#000080"><i> // typedefs: </i></font>
|
||
<b> typedef</b> charT char_type;
|
||
<b>typedef</b> traits traits_type;
|
||
<font color="#000080"> <i>// locale_type
|
||
</i> <i>// placeholder for actual locale type used by the
|
||
</i> <i>// traits class to localise *this.
|
||
</i></font> <b>typedef</b> typename traits::locale_type locale_type;
|
||
<font color="#000080"> <i>// value_type
|
||
</i></font> <b>typedef</b> charT value_type;
|
||
<font color="#000080"> <i>// reference, const_reference
|
||
</i></font> <b>typedef</b> charT& reference;
|
||
<b>typedef</b> <b>const</b> charT& const_reference;
|
||
<font color="#000080"> <i>// iterator, const_iterator
|
||
</i></font> <b>typedef</b> <b>const</b> charT* const_iterator;
|
||
<b>typedef</b> const_iterator iterator;
|
||
<font color="#000080"> <i>// difference_type
|
||
</i></font> <b>typedef</b> <b>typename</b> Allocator::difference_type difference_type;
|
||
<font color="#000080"> <i>// size_type
|
||
</i></font> <b>typedef</b> <b>typename</b> Allocator::size_type size_type;
|
||
<font color="#000080"><i>// allocator_type
|
||
</i></font> <b> typedef</b> Allocator allocator_type;
|
||
<b>typedef</b> Allocator alloc_type;
|
||
<font color="#000080"> <i>// flag_type
|
||
</i></font> <b>typedef</b> boost::int_fast32_t flag_type;
|
||
<b>public</b>:
|
||
<font color="#000080"><em>// constructors</em></font>
|
||
<strong>explicit</strong> reg_expression(<b>const</b> Allocator& a = Allocator());
|
||
<strong>explicit</strong> reg_expression(<b>const</b> charT* p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
||
reg_expression(<b>const</b> charT* p1, <b>const</b> charT* p2, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
||
reg_expression(<b>const</b> charT* p, size_type len, flag_type f, <b>const</b> Allocator& a = Allocator());
|
||
reg_expression(<b>const</b> reg_expression&);
|
||
<b> template</b> <<b>class</b> ST, <b>class</b> SA>
|
||
<strong>explicit</strong> reg_expression(<b>const</b> std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
||
<b> template</b> <<b>class</b> I>
|
||
reg_expression(I first, I last, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
||
~reg_expression();
|
||
reg_expression& <b>operator</b>=(<b>const</b> reg_expression&);
|
||
reg_expression& <b>operator</b>=(<b>const</b> charT* ptr);
|
||
<b> template</b> <<b>class</b> ST, <b>class</b> SA>
|
||
reg_expression& <b>operator</b>=(<b>const</b> std::basic_string<charT, ST, SA>& p);
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// assign:
|
||
</i></font> reg_expression& assign(<b>const</b> reg_expression& that);
|
||
reg_expression& assign(<b>const</b> charT* ptr, flag_type f = regbase::normal);
|
||
reg_expression& assign(<b>const</b> charT* first, <b>const</b> charT* last, flag_type f = regbase::normal);
|
||
<b> template</b> <<b>class</b> string_traits, <b>class</b> A>
|
||
reg_expression& assign(
|
||
<b>const</b> std::basic_string<charT, string_traits, A>& s,
|
||
flag_type f = regbase::normal);
|
||
<b>template</b> <<b>class</b> iterator>
|
||
reg_expression& assign(iterator first,
|
||
iterator last,
|
||
flag_type f = regbase::normal);
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// allocator access:
|
||
</i></font> Allocator get_allocator()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// locale:
|
||
</i></font> locale_type imbue(<b>const</b> locale_type& l);
|
||
locale_type getloc()<b>const</b>;
|
||
<font color="#000080"><i> //
|
||
</i> <i>// flags:
|
||
</i></font> flag_type getflags()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// str:
|
||
</i></font> std::basic_string<charT> str()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// begin, end:
|
||
</i></font> const_iterator begin()<b>const</b>;
|
||
const_iterator end()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// swap:
|
||
</i></font> <b>void</b> swap(reg_expression&)<b>throw</b>();
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// size:
|
||
</i></font> size_type size()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// max_size:
|
||
</i></font> size_type max_size()<b>const</b>;
|
||
<font color="#000080"> <i>//
|
||
</i> <i>// empty:
|
||
</i></font> <b>bool</b> empty()<b>const</b>;
|
||
<b>unsigned</b> mark_count()<b>const</b>;
|
||
<b>bool</b> <b>operator</b>==(<b>const</b> reg_expression&)<b>const</b>;
|
||
<b>bool</b> <b>operator</b><(<b>const</b> reg_expression&)<b>const</b>;
|
||
};
|
||
} <font color="#000080"><i>// namespace boost </i></font></pre>
|
||
|
||
<p>Class reg_expression has the following public member functions:
|
||
<br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression(Allocator a =
|
||
Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs a default
|
||
instance of reg_expression without any expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression(charT* p, <b>unsigned</b>
|
||
f = regbase::normal, Allocator a = Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs an instance
|
||
of reg_expression from the expression denoted by the null
|
||
terminated string <b>p</b>, using the flags <b>f</b> to
|
||
determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression(charT* p1,
|
||
charT* p2, <b>unsigned</b> f = regbase::normal, Allocator
|
||
a = Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs an instance
|
||
of reg_expression from the expression denoted by pair of
|
||
input-iterators <b>p1</b> and <b>p2</b>, using the flags <b>f</b>
|
||
to determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression(charT* p,
|
||
size_type len, <b>unsigned</b> f, Allocator a = Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs an instance
|
||
of reg_expression from the expression denoted by the
|
||
string <b>p</b> of length <b>len</b>, using the flags <b>f</b>
|
||
to determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>template</b> <class
|
||
ST, class SA> <br>
|
||
reg_expression(<b>const</b> std::basic_string<charT,
|
||
ST, SA>& p, boost::int_fast32_t f = regbase::normal,
|
||
<b>const</b> Allocator& a = Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs an instance
|
||
of reg_expression from the expression denoted by the
|
||
string <b>p</b>, using the flags <b>f</b> to determine
|
||
regular expression syntax. See class <a href="#regbase">regbase</a>
|
||
for allowable flag values. <p>Note - this member may not
|
||
be available depending upon your compiler capabilities.</p>
|
||
</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">template <class I> <br>
|
||
reg_expression(I first, I last, flag_type f = regbase::normal,
|
||
const Allocator& a = Allocator());</td>
|
||
<td valign="top" width="45%"> Constructs an instance
|
||
of reg_expression from the expression denoted by pair of
|
||
input-iterators <b>p1</b> and <b>p2</b>, using the flags <b>f</b>
|
||
to determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression(<b>const</b>
|
||
reg_expression&);</td>
|
||
<td valign="top" width="45%">Copy constructor - copies an
|
||
existing regular expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression& <b>operator</b>=(<b>const</b>
|
||
reg_expression&);</td>
|
||
<td valign="top" width="45%">Copies an existing regular
|
||
expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression& <b>operator</b>=(<b>const</b>
|
||
charT* ptr);</td>
|
||
<td valign="top" width="45%">Equivalent to assign(ptr);</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">template <class ST, class
|
||
SA> <p>reg_expression& operator=(const std::basic_string<charT,
|
||
ST, SA>& p);</p>
|
||
</td>
|
||
<td valign="top" width="45%">Equivalent to assign(p);</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
||
reg_expression& that);</td>
|
||
<td valign="top" width="45%">Copies the regular
|
||
expression contained by <b>that</b>, throws <a
|
||
href="#bad_expression">bad_expression</a> if <b>that</b>
|
||
does not contain a valid expression. Returns *this.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
||
charT* p, flag_type f = regbase::normal);</td>
|
||
<td valign="top" width="45%">Compiles a regular
|
||
expression from the expression denoted by the null
|
||
terminated string <b>p</b>, using the flags <b>f</b> to
|
||
determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.
|
||
Throws <a href="#bad_expression">bad_expression</a> if <b>p</b>
|
||
does not contain a valid expression. Returns *this.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
||
charT* first, <b>const</b> charT* last, flag_type f =
|
||
regbase::normal);</td>
|
||
<td valign="top" width="45%">Compiles a regular
|
||
expression from the expression denoted by the pair of
|
||
input-iterators <b>first-last</b>, using the flags <b>f</b>
|
||
to determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.
|
||
Throws <a href="#bad_expression">bad_expression</a> if <b>first-last</b>
|
||
does not contain a valid expression. Returns *this.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>template</b> <<b>class</b>
|
||
string_traits, <b>class</b> A> <br>
|
||
reg_expression& assign(<b>const</b> std::basic_string<charT,
|
||
string_traits, A>& s, flag_type f = regbase::normal);</td>
|
||
<td valign="top" width="45%">Compiles a regular
|
||
expression from the expression denoted by the string <b>s</b>,
|
||
using the flags <b>f</b> to determine regular expression
|
||
syntax. See class <a href="#regbase">regbase</a> for
|
||
allowable flag values. Throws <a href="#bad_expression">bad_expression</a>
|
||
if <b>s</b> does not contain a valid expression. Returns
|
||
*this.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">template <class
|
||
iterator> <br>
|
||
reg_expression& assign(iterator first, iterator last,
|
||
flag_type f = regbase::normal);</td>
|
||
<td valign="top" width="45%">Compiles a regular
|
||
expression from the expression denoted by the pair of
|
||
input-iterators <b>first-last</b>, using the flags <b>f</b>
|
||
to determine regular expression syntax. See class <a
|
||
href="#regbase">regbase</a> for allowable flag values.
|
||
Throws <a href="#bad_expression">bad_expression</a> if <b>first-last</b>
|
||
does not contain a valid expression. Returns *this.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">Allocator get_allocator()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the allocator used
|
||
by the expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">locale_type imbue(<b>const</b>
|
||
locale_type& l);</td>
|
||
<td valign="top" width="45%">Imbues the expression with
|
||
the specified locale, and invalidates the current
|
||
expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">locale_type getloc()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the locale used by
|
||
the expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">flag_type getflags()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the flags used to
|
||
compile the current expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">std::basic_string<charT>
|
||
str()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the current
|
||
expression as a string.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">const_iterator begin()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns a pointer to the
|
||
first character of the current expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">const_iterator end()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns a pointer to the end
|
||
of the current expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">size_type size()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the length of the
|
||
current expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">size_type max_size()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the maximum length
|
||
of a regular expression text.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>bool</b> empty()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns true if the object
|
||
contains no valid expression.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>unsigned</b> mark_count()<b>const</b>
|
||
;</td>
|
||
<td valign="top" width="45%">Returns the number of sub-expressions
|
||
in the compiled regular expression. Note that this
|
||
includes the whole match (subexpression zero), so the
|
||
value returned is always >= 1.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="regex_char_traits"></a><i>Class regex_traits</i></h3>
|
||
|
||
<p>#include <<a href="../../boost/regex/regex_traits.hpp">boost/regex/regex_traits.hpp</a>>
|
||
</p>
|
||
|
||
<p><i>This is a preliminary version of the regular expression
|
||
traits class, and is subject to change</i>. </p>
|
||
|
||
<p>The purpose of the traits class is to make it easier to
|
||
customise the behaviour of <i>reg_expression </i>and the
|
||
associated matching algorithms. Custom traits classes can handle
|
||
special character sets or define additional character classes,
|
||
for example one could define [[:kanji:]] as the set of all (Unicode)
|
||
kanji characters. This library provides three traits classes and
|
||
a wrapper class <i>regex_traits</i>, which inherits from one of
|
||
these depending upon the default localisation model in use, class
|
||
<i>c_regex_traits</i> encapsulates the global C locale, class <i>w32_regex_traits</i>
|
||
encapsulates the global Win32 locale (only available on Win32
|
||
systems), and class <i>cpp_regex_traits</i> encapsulates the C++
|
||
locale (only provided if std::locale is supported): </p>
|
||
|
||
<pre>template <class charT> class c_regex_traits;
|
||
template<> class c_regex_traits<char> { /*details*/ };
|
||
template<> class c_regex_traits<wchar_t> { /*details*/ };
|
||
|
||
template <class charT> class w32_regex_traits;
|
||
template<> class w32_regex_traits<char> { /*details*/ };
|
||
template<> class w32_regex_traits<wchar_t> { /*details*/ };
|
||
|
||
template <class charT> class cpp_regex_traits;
|
||
template<> class cpp_regex_traits<char> { /*details*/ };
|
||
template<> class cpp_regex_traits<wchar_t> { /*details*/ };
|
||
|
||
template <class charT> class regex_traits : public base_type { /*detailts*/ };</pre>
|
||
|
||
<p>Where "<i>base_type</i>" defaults to <i>w32_regex_traits</i>
|
||
on Win32 systems, and <i>c_regex_traits</i> otherwise. The
|
||
default behaviour can be changed by defining one of
|
||
BOOST_REGEX_USE_C_LOCALE (forces use of <i>c_regex_traits</i> by
|
||
default), or BOOST_REGEX_USE_CPP_LOCALE (forces use of <i>cpp_regex_traits</i>
|
||
by default). Alternatively a specific traits class can be passed
|
||
to the <i>reg_expression</i> template. </p>
|
||
|
||
<p>The requirements for custom traits classes are <a
|
||
href="traits_class_ref.htm">documented separately here....</a> </p>
|
||
|
||
<p>There is also an example of a custom traits class supplied by <a
|
||
href="mailto:christian.engstrom@glindra.org">Christian Engstr<74>m</a>,
|
||
see <a
|
||
href="example/iso8859_1_regex_traits/iso8859_1_regex_traits.cpp">iso8859_1_regex_traits.cpp</a>
|
||
and <a
|
||
href="example/iso8859_1_regex_traits/iso8859_1_regex_traits.hpp">iso8859_1_regex_traits.hpp</a>,
|
||
see <a href="example/iso8859_1_regex_traits/readme.txt">the
|
||
readme file</a> for more details.</p>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="reg_match"></a><i>Class match_results</i></h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>Regular expressions are different from many simple pattern-matching
|
||
algorithms in that as well as finding an overall match they can
|
||
also produce sub-expression matches: each sub-expression being
|
||
delimited in the pattern by a pair of parenthesis (...). There
|
||
has to be some method for reporting sub-expression matches back
|
||
to the user: this is achieved this by defining a class <i>match_results</i>
|
||
that acts as an indexed collection of sub-expression matches,
|
||
each sub-expression match being contained in an object of type <i>sub_match</i>.
|
||
</p>
|
||
|
||
<pre><font color="#000080"><i>//
|
||
// class sub_match:
|
||
// denotes one sub-expression match.
|
||
//
|
||
</i></font><b>template</b> <<b>class</b> iterator>
|
||
<b>struct</b> sub_match
|
||
{
|
||
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type value_type;
|
||
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
||
<b>typedef</b> iterator iterator_type;
|
||
|
||
iterator first;
|
||
iterator second;
|
||
<b>bool</b> matched;
|
||
|
||
<b>operator</b> std::basic_string<value_type>()<b>const</b>;
|
||
|
||
<b>bool</b> <b>operator</b>==(<b>const</b> sub_match& that)<b>const</b>;
|
||
<b>bool</b> <b>operator</b> !=(<b>const</b> sub_match& that)<b>const</b>;
|
||
difference_type length()<b>const</b>;
|
||
};
|
||
|
||
<font color="#000080">//
|
||
// class match_results:
|
||
// contains an indexed collection of matched sub-expressions.
|
||
//
|
||
</font><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator = std::allocator<<strong>typename</strong> std::iterator_traits<iterator>::value_type > >
|
||
<b>class</b> match_results
|
||
{
|
||
<b>public</b>:
|
||
<b>typedef</b> Allocator alloc_type;
|
||
<b>typedef</b> <b>typename</b> Allocator::<b>template</b> Rebind<iterator>::size_type size_type;
|
||
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type char_type;
|
||
<b>typedef</b> sub_match<iterator> value_type;
|
||
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
||
<b>typedef</b> iterator iterator_type;
|
||
<strong>explicit</strong> match_results(<b>const</b> Allocator& a = Allocator());
|
||
match_results(<b>const</b> match_results& m);
|
||
match_results& <b>operator</b>=(<b>const</b> match_results& m);
|
||
~match_results();
|
||
size_type size()<b>const</b>;
|
||
<b>const</b> sub_match<iterator>& <b>operator</b>[](<b>int</b> n) <b>const</b>;
|
||
Allocator allocator()<b>const</b>;
|
||
difference_type length(<b>int</b> sub = 0)<b>const</b>;
|
||
difference_type position(<b>unsigned</b> <b>int</b> sub = 0)<b>const</b>;
|
||
<b>unsigned</b> <b>int</b> line()<b>const</b>;
|
||
iterator line_start()<b>const</b>;
|
||
std::basic_string<char_type> str(<b>int</b> sub = 0)<b>const</b>;
|
||
<b>void</b> swap(match_results& that);
|
||
<b>bool</b> <b>operator</b>==(<b>const</b> match_results& that)<b>const</b>;
|
||
<b>bool</b> <b>operator</b><(<b>const</b> match_results& that)<b>const</b>;
|
||
};
|
||
<strong>typedef</strong> match_results<<strong>const</strong> <strong>char</strong>*> cmatch;
|
||
<strong>typedef</strong> match_results<<strong>const</strong> <strong>wchar_t</strong>*> wcmatch;
|
||
<strong>typedef</strong> match_results<std::string::const_iterator> smatch;
|
||
<strong>typedef</strong> match_results<std::wstring::const_iterator> wsmatch; </pre>
|
||
|
||
<p>Class match_results is used for reporting what matched a
|
||
regular expression, it is passed to the matching algorithms <a
|
||
href="#query_match">regex_match</a> and <a href="#reg_search">regex_search</a>,
|
||
and is used by <a href="#reg_grep">regex_grep</a> to notify the
|
||
callback function (or function object) what matched. Note that
|
||
the default allocator parameter has been chosen to match the
|
||
default allocator parameter to reg_expression. match_results has
|
||
the following public member functions: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_results(Allocator a =
|
||
Allocator());</td>
|
||
<td valign="top" width="45%">Constructs an instance of
|
||
match_results, using allocator instance a.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_results(const
|
||
match_results& m);</td>
|
||
<td valign="top" width="45%">Copy constructor.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_results& operator=(const
|
||
match_results& m);</td>
|
||
<td valign="top" width="45%">Assignment operator.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>const</b>
|
||
sub_match<iterator>& <b>operator</b>[](size_type
|
||
n) const;</td>
|
||
<td valign="top" width="45%">Returns what matched, item 0
|
||
represents the whole string, item 1 the first sub-expression
|
||
and so on.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">Allocator& allocator()const;</td>
|
||
<td valign="top" width="45%">Returns the allocator used
|
||
by the class.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">difference_type length(<b>unsigned
|
||
int</b> sub = 0);</td>
|
||
<td valign="top" width="45%">Returns the length of the
|
||
matched subexpression, defaults to the length of the
|
||
whole match, in effect this is equivalent to operator[](sub).second
|
||
- operator[](sub).first.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">difference_type position(<b>unsigned
|
||
int</b> sub = 0);</td>
|
||
<td valign="top" width="45%">Returns the position of the
|
||
matched sub-expression, defaults to the position of the
|
||
whole match. The returned value is the position of the
|
||
match relative to the start of the string.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%"><b>unsigned</b> <b>int</b>
|
||
line()<b>const</b>;</td>
|
||
<td valign="top" width="45%">Returns the index of the
|
||
line on which the match occurred, indices start with 1,
|
||
not zero. Equivalent to the number of newline characters
|
||
prior to operator[](0).first plus one.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">iterator line_start()<b>const;</b></td>
|
||
<td valign="top" width="45%">Returns an iterator denoting
|
||
the start of the line on which the match occurred.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">size_type size()<b>const;</b></td>
|
||
<td valign="top" width="45%">Returns how many sub-expressions
|
||
are present in the match, including sub-expression zero (the
|
||
whole match). This is the case even if no matches were
|
||
found in the search operation - you must use the returned
|
||
value from <a href="#reg_search">regex_search</a> / <a
|
||
href="#query_match">regex_match</a> to determine whether
|
||
any match occured.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><br>
|
||
</p>
|
||
|
||
<p>The operator[] member function needs further explanation: it
|
||
returns a const reference to a structure of type
|
||
sub_match<iterator>, which has the following public members:
|
||
<br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%"><b>typedef</b> <b>typename</b>
|
||
std::iterator_traits<iterator>::value_type
|
||
value_type;</td>
|
||
<td valign="top" width="44%">The type pointed to by the
|
||
iterators.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%"><b>typedef</b> <b>typename</b>
|
||
std::iterator_traits<iterator>::difference_type
|
||
difference_type;</td>
|
||
<td valign="top" width="44%">A type that represents the
|
||
difference between two iterators.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%"><b>typedef</b> iterator
|
||
iterator_type;</td>
|
||
<td valign="top" width="44%">The iterator type.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">iterator first</td>
|
||
<td valign="top" width="44%">An iterator denoting the
|
||
position of the start of the match.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">iterator second</td>
|
||
<td valign="top" width="44%">An iterator denoting the
|
||
position of the end of the match.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%"><b>bool</b> matched</td>
|
||
<td valign="top" width="44%">A Boolean value denoting
|
||
whether this sub-expression participated in the match.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">difference_type length()<b>const;</b></td>
|
||
<td valign="top" width="44%">Returns the length of the
|
||
sub-expression match.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%"><b>operator</b> std::basic_string<value_type>
|
||
()<b>const</b>;</td>
|
||
<td valign="top" width="44%">Converts the sub-expression
|
||
match into an instance of std::basic_string<>. Note
|
||
that this member may be either absent, or present to a
|
||
more limited degree depending upon your compiler
|
||
capabilities.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p>Operator[] takes an integer as an argument that denotes the
|
||
sub-expression for which to return information, the argument can
|
||
take the following special values: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">-2</td>
|
||
<td valign="top" width="44%">Returns everything from the
|
||
end of the match, to the end of the input string,
|
||
equivalent to $' in perl. If this is a null string, then:
|
||
<p>first == second </p>
|
||
<p>And </p>
|
||
<p>matched == false.</p>
|
||
</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">-1</td>
|
||
<td valign="top" width="44%">Returns everything from the
|
||
start of the input string (or the end of the last match
|
||
if this is a grep operation), to the start of this match.
|
||
Equivalent to $` in perl. If this is a null string, then:
|
||
<p>first == second </p>
|
||
<p>And </p>
|
||
<p>matched == false.</p>
|
||
</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">0</td>
|
||
<td valign="top" width="44%">Returns the whole of what
|
||
matched, equivalent to $& in perl. The matched
|
||
parameter is always true.</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">0 < N < size()</td>
|
||
<td valign="top" width="44%">Returns what matched sub-expression
|
||
N, if this sub-expression did not participate in the
|
||
match then <p>matched == false </p>
|
||
<p>otherwise: </p>
|
||
<p>matched == true.</p>
|
||
</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="6%"> </td>
|
||
<td valign="top" width="44%">N < -2 or N >= size()</td>
|
||
<td valign="top" width="44%">Represents an out-of range
|
||
non-existent sub-expression. Returns a "null"
|
||
match in which <p>first == last </p>
|
||
<p>And </p>
|
||
<p>matched == false.</p>
|
||
</td>
|
||
<td valign="top" width="5%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p>Note that as well as being parameterised for an allocator,
|
||
match_results<> also takes an iterator type, this allows
|
||
any pair of iterators to be searched for a given regular
|
||
expression, provided the iterators have at least bi-directional
|
||
properties. </p>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="query_match"></a>Algorithm regex_match</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>The algorithm regex _match determines whether a given regular
|
||
expression matches a given sequence denoted by a pair of
|
||
bidirectional-iterators, the algorithm is defined as follows, <em>note
|
||
that the result is true only if the expression matches the whole
|
||
of the input sequence</em>, the main use of this function is data
|
||
input validation: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(iterator first,
|
||
iterator last,
|
||
match_results<iterator, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>The library also defines the following convenience versions,
|
||
which take either a const charT*, or a const std::basic_string<>&
|
||
in place of a pair of iterators [note - these versions may not be
|
||
available, or may be available in a more limited form, depending
|
||
upon your compilers capabilities]: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(<b>const</b> charT* str,
|
||
match_results<<b>const</b> charT*, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default)
|
||
|
||
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>Finally there is a set of convenience versions that simply
|
||
return true or false and do not indicate what matched: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(iterator first,
|
||
iterator last,
|
||
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(<b>const</b> charT* str,
|
||
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default)
|
||
|
||
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>The parameters for the main function version are as follows: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td width="51%">iterator first</td>
|
||
<td>Denotes the start of the range to be matched.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="51%">iterator last</td>
|
||
<td valign="top" width="51%">Denotes the end of the range
|
||
to be matched.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="51%">match_results<iterator,
|
||
Allocator>& m</td>
|
||
<td valign="top" width="51%">An instance of match_results
|
||
in which what matched will be reported. On exit if a
|
||
match occurred then m[0] denotes the whole of the string
|
||
that matched, m[0].first must be equal to first, m[0].second
|
||
will be less than or equal to last. m[1] denotes the
|
||
first subexpression m[2] the second subexpression and so
|
||
on. If no match occurred then m[0].first = m[0].second =
|
||
last.<p>Note that since the match_results structure
|
||
stores only iterators, and not strings, the iterators/strings
|
||
passed to regex_match must be valid for as long as the
|
||
result is to be used. For that reason never pass
|
||
temporary string objects to regex_match.</p>
|
||
</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="51%">const
|
||
reg_expression<charT, traits, Allocator2>& e</td>
|
||
<td valign="top" width="51%">Contains the regular
|
||
expression to be matched.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="51%">unsigned flags =
|
||
match_default</td>
|
||
<td valign="top" width="51%">Determines the semantics
|
||
used for matching, a combination of one or more <a
|
||
href="#match_type">match_flags</a> enumerators.</td>
|
||
<td> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p>regex_match returns false if no match occurs or true if it
|
||
does. A match only occurs if it starts at <b>first</b> and
|
||
finishes at <b>last</b>. Example: the following <a
|
||
href="example/snippets/regex_match_example.cpp">example</a>
|
||
processes an ftp response: </p>
|
||
|
||
<pre><font color="#008000">#include <stdlib.h>
|
||
#include <boost/regex.hpp>
|
||
#include <string>
|
||
#include <iostream>
|
||
|
||
</font><b>using namespace</b> boost;
|
||
|
||
regex expression(<font color="#000080">"([0-9]+)(\\-| |$)(.*)"</font>);
|
||
|
||
<font color="#000080"><i>// process_ftp:
|
||
// on success returns the ftp response code, and fills
|
||
// msg with the ftp response message.
|
||
</i></font><b>int</b> process_ftp(<b>const</b> <b>char</b>* response, std::string* msg)
|
||
{
|
||
cmatch what;
|
||
<b>if</b>(regex_match(response, what, expression))
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string
|
||
</i> <i>// what[1] contains the response code
|
||
</i> <i>// what[2] contains the separator character
|
||
</i> <i>// what[3] contains the text message.
|
||
</i></font> <b>if</b>(msg)
|
||
msg->assign(what[3].first, what[3].second);
|
||
<b>return</b> std::atoi(what[1].first);
|
||
}
|
||
<font color="#000080"> <i>// failure did not match
|
||
</i></font> <b>if</b>(msg)
|
||
msg->erase();
|
||
<b>return</b> -1;
|
||
}</pre>
|
||
|
||
<p><a name="match_type"></a>The value of the flags parameter
|
||
passed to the algorithm must be a combination of one or more of
|
||
the following values: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_default</td>
|
||
<td valign="top" width="45%">The default value, indicates
|
||
that <b>first</b> represents the start of a line, the
|
||
start of a buffer, and (possibly) the start of a word.
|
||
Also implies that <b>last</b> represents the end of a
|
||
line, the end of the buffer and (possibly) the end of a
|
||
word. Implies that a dot sub-expression "."
|
||
will match both the newline character and a null.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_bol</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
||
does not represent the start of a new line.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_eol</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
||
does not represent the end of a line.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_bob</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
||
is not the beginning of a buffer.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_eob</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
||
does not represent the end of a buffer.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_bow</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
||
can never match the start of a word.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_eow</td>
|
||
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
||
can never match the end of a word.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_dot_newline</td>
|
||
<td valign="top" width="45%">When this flag is set then a
|
||
dot expression "." can not match the newline
|
||
character.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="45%">match_not_dot_null</td>
|
||
<td valign="top" width="45%">When this flag is set then a
|
||
dot expression "." can not match a null
|
||
character.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%" height="75"> </td>
|
||
<td valign="top" width="45%" height="75">match_prev_avail</td>
|
||
<td valign="top" width="45%" height="75">When this flag
|
||
is set, then *--<b>first</b> is a valid expression and
|
||
the flags match_not_bol and match_not_bow have no effect,
|
||
since the value of the previous character can be used to
|
||
check these.</td>
|
||
<td width="5%" height="75"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%" height="15"> </td>
|
||
<td valign="top" width="45%" height="15">match_any</td>
|
||
<td valign="top" width="45%" height="15">When this flag
|
||
is set, then the first string matched is returned, rather
|
||
than the longest possible match. This flag can
|
||
significantly reduce the time taken to find a match, but
|
||
what matches is undefined.</td>
|
||
<td width="5%" height="15"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%" height="15"> </td>
|
||
<td valign="top" width="45%" height="15">match_not_null</td>
|
||
<td valign="top" width="45%" height="15">When this flag
|
||
is set, then the expression will never match a null
|
||
string.</td>
|
||
<td width="5%" height="15"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%" height="15"> </td>
|
||
<td valign="top" width="45%" height="15">match_continuous</td>
|
||
<td valign="top" width="45%" height="15">When this flags
|
||
is set, then during a grep operation, each successive
|
||
match must start from where the previous match finished.</td>
|
||
<td width="5%" height="15"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td width="5%" height="15"> </td>
|
||
<td valign="top" width="45%" height="15">match_partial</td>
|
||
<td valign="top" width="45%" height="15">When this flag
|
||
is set, the regex algorithms will report <a
|
||
href="#partial_matches">partial matches</a> - that is
|
||
where one or more characters at the end of the text input
|
||
matched some prefix of the regular expression.</td>
|
||
<td width="5%" height="15"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p> </p>
|
||
|
||
<hr align="right">
|
||
|
||
<h3><a name="reg_search"></a>Algorithm regex_search</h3>
|
||
|
||
<p> #include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>The algorithm regex_search will search a range denoted by a
|
||
pair of bidirectional-iterators for a given regular expression.
|
||
The algorithm uses various heuristics to reduce the search time
|
||
by only checking for a match if a match could conceivably start
|
||
at that position. The algorithm is defined as follows: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_search(iterator first,
|
||
iterator last,
|
||
match_results<iterator, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>The library also defines the following convenience versions,
|
||
which take either a const charT*, or a const std::basic_string<>&
|
||
in place of a pair of iterators [note - these versions may not be
|
||
available, or may be available in a more limited form, depending
|
||
upon your compilers capabilities]: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_search(<b>const</b> charT* str,
|
||
match_results<<b>const</b> charT*, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
||
<b>bool</b> regex_search(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
||
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>The parameters for the main function version are as follows: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="50%">iterator first</td>
|
||
<td valign="top" width="50%">The starting position of the
|
||
range to search.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">iterator last</td>
|
||
<td valign="top" width="50%">The ending position of the
|
||
range to search.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">match_results<iterator,
|
||
Allocator>& m</td>
|
||
<td valign="top" width="50%">An instance of match_results
|
||
in which what matched will be reported. On exit if a
|
||
match occurred then m[0] denotes the whole of the string
|
||
that matched, m[0].first and m[0].second will be less
|
||
than or equal to last. m[1] denotes the first sub-expression
|
||
m[2] the second sub-expression and so on. If no match
|
||
occurred then m[0].first = m[0].second = last.<p>Note
|
||
that since the match_results structure stores only
|
||
iterators, and not strings, the iterators/strings passed
|
||
to regex_search must be valid for as long as the result
|
||
is to be used. For that reason never pass temporary
|
||
string objects to regex_search.</p>
|
||
</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">const
|
||
reg_expression<charT, traits, Allocator2>& e</td>
|
||
<td valign="top" width="50%">The regular expression to
|
||
search for.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">unsigned flags =
|
||
match_default</td>
|
||
<td valign="top" width="50%">The flags that determine
|
||
what gets matched, a combination of one or more <a
|
||
href="#match_type">match_flags</a> enumerators.</td>
|
||
<td> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><br>
|
||
</p>
|
||
|
||
<p>Example: the following <a
|
||
href="example/snippets/regex_search_example.cpp">example</a>,
|
||
takes the contents of a file in the form of a string, and
|
||
searches for all the C++ class declarations in the file. The code
|
||
will work regardless of the way that std::string is implemented,
|
||
for example it could easily be modified to work with the SGI rope
|
||
class, which uses a non-contiguous storage strategy. </p>
|
||
|
||
<pre><font color="#008000">#include <string>
|
||
#include <map>
|
||
#include <boost/regex.hpp>
|
||
</font><font color="#000080"><i>
|
||
// purpose:
|
||
// takes the contents of a file in the form of a string
|
||
// and searches for all the C++ class definitions, storing
|
||
// their locations in a map of strings/int's
|
||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||
|
||
boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)");
|
||
<b>
|
||
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
||
{
|
||
std::string::const_iterator start, end;
|
||
start = file.begin();
|
||
end = file.end();
|
||
boost::match_results<std::string::const_iterator> what;
|
||
<b>unsigned</b> <b>int</b> flags = boost::match_default;
|
||
<b>while</b>(regex_search(start, end, what, expression, flags))
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string
|
||
</i> <i>// what[5] contains the class name.
|
||
</i> <i>// what[6] contains the template specialisation if any.
|
||
</i> <i>// add class name and position to map:
|
||
</i></font> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||
what[5].first - file.begin();
|
||
<font color="#000080"><i>// update search position:
|
||
</i></font> start = what[0].second;
|
||
<font color="#000080"><i>// update flags:
|
||
</i></font> flags |= boost::match_prev_avail;
|
||
flags |= boost::match_not_bob;
|
||
}
|
||
}
|
||
</pre>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="reg_grep"></a>Algorithm regex_grep</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p> Regex_grep allows you to search through a bidirectional-iterator
|
||
range and locate all the (non-overlapping) matches with a given
|
||
regular expression. The function is declared as: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> iterator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator>
|
||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||
iterator first,
|
||
iterator last,
|
||
<b> const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b> unsigned</b> flags = match_default)</pre>
|
||
|
||
<p>The library also defines the following convenience versions,
|
||
which take either a const charT*, or a const std::basic_string<>&
|
||
in place of a pair of iterators [note - these versions may not be
|
||
available, or may be available in a more limited form, depending
|
||
upon your compilers capabilities]: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits>
|
||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||
<b>const</b> charT* str,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b>unsigned</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits>
|
||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b>unsigned</b> flags = match_default);</pre>
|
||
|
||
<p>The parameters for the primary version of regex_grep have the
|
||
following meanings: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="624">
|
||
<tr>
|
||
<td width="5%"> </td>
|
||
<td valign="top" width="50%">foo</td>
|
||
<td valign="top" width="50%">A predicate function object
|
||
or function pointer, see below for more information.</td>
|
||
<td width="5%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">first</td>
|
||
<td valign="top" width="50%">The start of the range to
|
||
search.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">last</td>
|
||
<td valign="top" width="50%">The end of the range to
|
||
search.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">e</td>
|
||
<td valign="top" width="50%">The regular expression to
|
||
search for.</td>
|
||
<td> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td valign="top" width="50%">flags</td>
|
||
<td valign="top" width="50%">The flags that determine how
|
||
matching is carried out, one of the <a href="#match_type">match_flags</a>
|
||
enumerators.</td>
|
||
<td> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p> The algorithm finds all of the non-overlapping matches
|
||
of the expression e, for each match it fills a <a
|
||
href="#reg_match">match_results</a><iterator, Allocator>
|
||
structure, which contains information on what matched, and calls
|
||
the predicate foo, passing the match_results<iterator,
|
||
Allocator> as a single argument. If the predicate returns
|
||
true, then the grep operation continues, otherwise it terminates
|
||
without searching for further matches. The function returns the
|
||
number of matches found.</p>
|
||
|
||
<p>The general form of the predicate is: </p>
|
||
|
||
<pre><b>struct</b> grep_predicate
|
||
{
|
||
<b> bool</b> <b>operator</b>()(<b>const</b> match_results<iterator_type, expression_type::alloc_type>& m);
|
||
};</pre>
|
||
|
||
<p>For example the regular expression "a*b" would find
|
||
one match in the string "aaaaab" and two in the string
|
||
"aaabb". </p>
|
||
|
||
<p>Remember this algorithm can be used for a lot more than
|
||
implementing a version of grep, the predicate can be and do
|
||
anything that you want, grep utilities would output the results
|
||
to the screen, another program could index a file based on a
|
||
regular expression and store a set of bookmarks in a list, or a
|
||
text file conversion utility would output to file. The results of
|
||
one regex_grep can even be chained into another regex_grep to
|
||
create recursive parsers. </p>
|
||
|
||
<p><a href="example/snippets/regex_grep_example_1.cpp">Example</a>:
|
||
convert the example from <i>regex_search</i> to use <i>regex_grep</i>
|
||
instead: </p>
|
||
|
||
<pre><font color="#008000">#include <string>
|
||
#include <map>
|
||
#include <boost/regex.hpp>
|
||
|
||
</font><font color="#000080"><i>// IndexClasses:
|
||
// takes the contents of a file in the form of a string
|
||
// and searches for all the C++ class definitions, storing
|
||
// their locations in a map of strings/int's
|
||
</i></font><b>
|
||
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||
|
||
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)"
|
||
"[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>);
|
||
<b>
|
||
class</b> IndexClassesPred
|
||
{
|
||
map_type& m;
|
||
std::string::const_iterator base;
|
||
<b>public</b>:
|
||
IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {}
|
||
<b>bool</b> <b>operator</b>()(<b>const</b> match_results<std::string::const_iterator, regex::alloc_type>& what)
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string
|
||
</i> <i>// what[5] contains the class name.
|
||
</i> <i>// what[6] contains the template specialisation if any.
|
||
</i> <i>// add class name and position to map:
|
||
</i></font> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||
what[5].first - base;
|
||
<b>return</b> <b>true</b>;
|
||
}
|
||
};
|
||
<b>
|
||
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
||
{
|
||
std::string::const_iterator start, end;
|
||
start = file.begin();
|
||
end = file.end();
|
||
regex_grep(IndexClassesPred(m, start), start, end, expression);
|
||
} </pre>
|
||
|
||
<p><a href="example/snippets/regex_grep_example_2.cpp">Example</a>:
|
||
Use regex_grep to call a global callback function: </p>
|
||
|
||
<pre><font color="#008000">#include <string>
|
||
#include <map>
|
||
#include <boost/regex.hpp>
|
||
|
||
</font><font color="#000080"><i>// purpose:
|
||
// takes the contents of a file in the form of a string
|
||
// and searches for all the C++ class definitions, storing
|
||
// their locations in a map of strings/int's
|
||
</i></font><b>
|
||
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||
|
||
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>);
|
||
|
||
map_type class_index;
|
||
std::string::const_iterator base;
|
||
|
||
<b>bool</b> grep_callback(<b>const</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type>& what)
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string
|
||
</i> <i>// what[5] contains the class name.
|
||
</i> <i>// what[6] contains the template specialisation if any.
|
||
</i> <i>// add class name and position to map:
|
||
</i></font> class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||
what[5].first - base;
|
||
<b>return</b> <b>true</b>;
|
||
}
|
||
<b>
|
||
void</b> IndexClasses(<b>const</b> std::string& file)
|
||
{
|
||
std::string::const_iterator start, end;
|
||
start = file.begin();
|
||
end = file.end();
|
||
base = start;
|
||
regex_grep(grep_callback, start, end, expression, match_default);
|
||
}
|
||
</pre>
|
||
|
||
<p><a href="example/snippets/regex_grep_example_3.cpp">Example</a>:
|
||
use regex_grep to call a class member function, use the standard
|
||
library adapters <i>std::mem_fun</i> and <i>std::bind1st</i> to
|
||
convert the member function into a predicate: </p>
|
||
|
||
<pre><font color="#008000">#include <string>
|
||
#include <map>
|
||
#include <boost/regex.hpp>
|
||
#include <functional>
|
||
</font><font color="#000080"><i>
|
||
// purpose:
|
||
// takes the contents of a file in the form of a string
|
||
// and searches for all the C++ class definitions, storing
|
||
// their locations in a map of strings/int's
|
||
|
||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||
<b>
|
||
class</b> class_index
|
||
{
|
||
boost::regex expression;
|
||
map_type index;
|
||
std::string::const_iterator base;
|
||
<b>bool</b> grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what);
|
||
<b>public</b>:
|
||
<b> void</b> IndexClasses(<b>const</b> std::string& file);
|
||
class_index()
|
||
: index(),
|
||
expression(<font
|
||
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
||
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
||
"(\\{|:[^;\\{()]*\\{)"
|
||
</font> ){}
|
||
};
|
||
<b>
|
||
bool</b> class_index::grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what)
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string
|
||
</i> <i>// what[5] contains the class name.
|
||
</i> <i>// what[6] contains the template specialisation if any.
|
||
</i> <i>// add class name and position to map:
|
||
</i></font> index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||
what[5].first - base;
|
||
<b>return</b> <b>true</b>;
|
||
}
|
||
|
||
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
||
{
|
||
std::string::const_iterator start, end;
|
||
start = file.begin();
|
||
end = file.end();
|
||
base = start;
|
||
regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), <b>this</b>),
|
||
start,
|
||
end,
|
||
expression);
|
||
}
|
||
</pre>
|
||
|
||
<p><a href="example/snippets/regex_grep_example_4.cpp">Finally</a>,
|
||
C++ Builder users can use C++ Builder's closure type as a
|
||
callback argument: </p>
|
||
|
||
<pre><font color="#008000">#include <string>
|
||
#include <map>
|
||
#include <boost/regex.hpp>
|
||
#include <functional>
|
||
</font><font color="#000080"><i>
|
||
// purpose:
|
||
// takes the contents of a file in the form of a string
|
||
// and searches for all the C++ class definitions, storing
|
||
// their locations in a map of strings/int's
|
||
|
||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||
<b>class</b> class_index
|
||
{
|
||
boost::regex expression;
|
||
map_type index;
|
||
std::string::const_iterator base;
|
||
<b>typedef</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type> arg_type;
|
||
<b>bool</b> grep_callback(<b>const</b> arg_type& what);
|
||
<b>public</b>:
|
||
<b>typedef</b> <b>bool</b> (<b>__closure</b>* grep_callback_type)(<b>const</b> arg_type&);
|
||
<b>void</b> IndexClasses(<b>const</b> std::string& file);
|
||
class_index()
|
||
: index(),
|
||
expression(<font
|
||
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
||
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
||
"(\\{|:[^;\\{()]*\\{)"
|
||
</font> ){}
|
||
};
|
||
|
||
<b>bool</b> class_index::grep_callback(<b>const</b> arg_type& what)
|
||
{
|
||
<font color="#000080"> <i>// what[0] contains the whole string </i>
|
||
<i>// what[5] contains the class name. </i>
|
||
<i>// what[6] contains the template specialisation if any. </i>
|
||
<i>// add class name and position to map: </i></font>
|
||
index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||
what[5].first - base;
|
||
<b>return</b> <b>true</b>;
|
||
}
|
||
|
||
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
||
{
|
||
std::string::const_iterator start, end;
|
||
start = file.begin();
|
||
end = file.end();
|
||
base = start;
|
||
class_index::grep_callback_type cl = &(<b>this</b>->grep_callback);
|
||
regex_grep(cl,
|
||
start,
|
||
end,
|
||
expression);
|
||
} </pre>
|
||
|
||
<hr>
|
||
|
||
<h3> <a name="reg_format"></a>Algorithm regex_format</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>The algorithm regex_format takes the results of a match and
|
||
creates a new string based upon a <a
|
||
href="format_string.htm#format_string">format string</a>,
|
||
regex_format can be used for search and replace operations: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||
OutputIterator regex_format(OutputIterator out,
|
||
<b>const</b> match_results<iterator, Allocator>& m,
|
||
<b>const</b> charT* fmt,
|
||
<b>unsigned</b> flags = 0);
|
||
<b>
|
||
template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||
OutputIterator regex_format(OutputIterator out,
|
||
<b>const</b> match_results<iterator, Allocator>& m,
|
||
<b>const</b> std::basic_string<charT>& fmt,
|
||
<b>unsigned</b> flags = 0);</pre>
|
||
|
||
<p>The library also defines the following convenience variation
|
||
of regex_format, which returns the result directly as a string,
|
||
rather than outputting to an iterator [note - this version may
|
||
not be available, or may be available in a more limited form,
|
||
depending upon your compilers capabilities]: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||
std::basic_string<charT> regex_format
|
||
(<b>const</b> match_results<iterator, Allocator>& m,
|
||
<b>const</b> charT* fmt,
|
||
<b>unsigned</b> flags = 0);
|
||
|
||
<b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||
std::basic_string<charT> regex_format
|
||
(<b>const</b> match_results<iterator, Allocator>& m,
|
||
<b>const</b> std::basic_string<charT>& fmt,
|
||
<b>unsigned</b> flags = 0);</pre>
|
||
|
||
<p>Parameters to the main version of the function are passed as
|
||
follows: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%">OutputIterator out</td>
|
||
<td valign="top" width="44%">An output iterator type, the
|
||
output string is sent to this iterator. Typically this
|
||
would be a std::ostream_iterator.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%"><b>const</b>
|
||
match_results<iterator, Allocator>& m</td>
|
||
<td valign="top" width="44%">An instance of
|
||
match_results<> obtained from one of the matching
|
||
algorithms above, and denoting what matched.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%"><b>const</b> charT* fmt</td>
|
||
<td valign="top" width="44%">A format string that
|
||
determines how the match is transformed into the new
|
||
string.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%"><b>unsigned</b> flags</td>
|
||
<td valign="top" width="44%">Optional flags which
|
||
describe how the format string is to be interpreted.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><a name="format_flags"></a>Format flags are defined as follows:
|
||
<br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%">format_all</td>
|
||
<td valign="top" width="43%">Enables all syntax options (perl-like
|
||
plus extentions).</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%">format_sed</td>
|
||
<td valign="top" width="43%">Allows only a sed-like
|
||
syntax.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%">format_perl</td>
|
||
<td valign="top" width="43%">Allows only a perl-like
|
||
syntax.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="9%"> </td>
|
||
<td valign="top" width="39%">format_no_copy</td>
|
||
<td valign="top" width="43%">Disables copying of
|
||
unmatched sections to the output string during <a
|
||
href="#reg_merge">regex_merge</a> operations.</td>
|
||
<td valign="top" width="9%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td> </td>
|
||
<td>format_first_only</td>
|
||
<td>When this flag is set only the first occurance will
|
||
be replaced (applies to regex_merge only).</td>
|
||
<td> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><br>
|
||
</p>
|
||
|
||
<p>The format string syntax (and available options) is described
|
||
more fully under <a href="format_string.htm#format_string">format
|
||
strings</a>. </p>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="reg_merge"></a>Algorithm regex_merge</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>The algorithm regex_merge is a combination of <a
|
||
href="#reg_grep">regex_grep</a> and <a href="#reg_format">regex_format</a>.
|
||
That is, it greps through the string finding all the matches to
|
||
the regular expression, for each match it then calls <a
|
||
href="#reg_format">regex_format</a> to format the string and
|
||
sends the result to the output iterator. Sections of text that do
|
||
not match are copied to the output unchanged only if the flags
|
||
parameter does not have the flag <a href="#format_flags">format_no_copy</a>
|
||
set. If the flag <a href="#format_flags">format_first_only</a> is
|
||
set then only the first occurance is replaced rather than all
|
||
occurrences.</p>
|
||
|
||
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
||
OutputIterator regex_merge(OutputIterator out,
|
||
iterator first,
|
||
iterator last,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b>const</b> charT* fmt,
|
||
<b> unsigned</b> <b>int</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
||
OutputIterator regex_merge(OutputIterator out,
|
||
iterator first,
|
||
iterator last,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
std::basic_string<charT>& fmt,
|
||
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
||
|
||
<p>The library also defines the following convenience variation
|
||
of regex_merge, which returns the result directly as a string,
|
||
rather than outputting to an iterator [note - this version may
|
||
not be available, or may be available in a more limited form,
|
||
depending upon your compilers capabilities]: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
||
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b>const</b> charT* fmt,
|
||
<b> unsigned</b> <b>int</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
||
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
||
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
||
<b>const</b> std::basic_string<charT>& fmt,
|
||
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
||
|
||
<p>Parameters to the main version of the function are passed as
|
||
follows: <br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%">OutputIterator out</td>
|
||
<td valign="top" width="45%">An output iterator type, the
|
||
output string is sent to this iterator. Typically this
|
||
would be a std::ostream_iterator.</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%">iterator first</td>
|
||
<td valign="top" width="45%">The start of the range of
|
||
text to grep (bidirectional-iterator).</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%">iterator last</td>
|
||
<td valign="top" width="45%">The end of the range of text
|
||
to grep (bidirectional-iterator).</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%"><b>const</b>
|
||
reg_expression<charT, traits, Allocator>& e</td>
|
||
<td valign="top" width="45%">The expression to search for.</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%"><b>const</b> charT* fmt</td>
|
||
<td valign="top" width="45%">The format string to be
|
||
applied to sections of text that match.</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="7%"> </td>
|
||
<td valign="top" width="40%"><b>unsigned</b> <b>int</b>
|
||
flags = match_default</td>
|
||
<td valign="top" width="45%">Flags which determine how
|
||
the expression is matched - see <a href="#match_type">match_flags</a>,
|
||
and how the format string is interpreted - see <a
|
||
href="#format_flags">format_flags</a>.</td>
|
||
<td valign="top" width="8%"> </td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p>Example: the following <a
|
||
href="example/snippets/regex_merge_example.cpp">example</a> takes
|
||
C/C++ source code as input, and outputs syntax highlighted HTML
|
||
code. </p>
|
||
|
||
<pre>
|
||
<font color="#008080">#include <fstream>
|
||
#include <sstream>
|
||
#include <string>
|
||
#include <iterator>
|
||
#include <boost/regex.hpp>
|
||
#include <fstream>
|
||
#include <iostream>
|
||
</font>
|
||
<font color="#000080"><i>// purpose:
|
||
// takes the contents of a file and transform to
|
||
// syntax highlighted code in html format
|
||
</i></font>
|
||
boost::regex e1, e2;
|
||
<b>extern</b> <b>const</b> <b>char</b>* expression_text;
|
||
<b>extern</b> <b>const</b> <b>char</b>* format_string;
|
||
<b>extern</b> <b>const</b> <b>char</b>* pre_expression;
|
||
<b>extern</b> <b>const</b> <b>char</b>* pre_format;
|
||
<b>extern</b> <b>const</b> <b>char</b>* header_text;
|
||
<b>extern</b> <b>const</b> <b>char</b>* footer_text;
|
||
|
||
<b>void</b> load_file(std::string& s, std::istream& is)
|
||
{
|
||
s.erase();
|
||
s.reserve(is.rdbuf()->in_avail());
|
||
<b>char</b> c;
|
||
<b>while</b>(is.get(c))
|
||
{
|
||
<b>if</b>(s.capacity() == s.size())
|
||
s.reserve(s.capacity() * <font color="#000080">3</font>);
|
||
s.append(<font color="#000080">1</font>, c);
|
||
}
|
||
}
|
||
|
||
<b>int</b> main(<b>int</b> argc, <b>const</b> <b>char</b>** argv)
|
||
{
|
||
try{
|
||
e1.assign(expression_text);
|
||
e2.assign(pre_expression);
|
||
<b>for</b>(<b>int</b> i = <font color="#000080">1</font>; i < argc; ++i)
|
||
{
|
||
std::cout << <font color="#0000FF">"Processing file "</font> << argv[i] << std::endl;
|
||
std::ifstream fs(argv[i]);
|
||
std::string in;
|
||
load_file(in, fs);
|
||
std::string out_name(std::string(argv[i]) + std::string(<font
|
||
color="#0000FF">".htm"</font>));
|
||
std::ofstream os(out_name.c_str());
|
||
os << header_text;
|
||
<font color="#000080"><i>// strip '<' and '>' first by outputting to a
|
||
</i></font> <font color="#000080"><i>// temporary string stream
|
||
</i></font> std::ostringstream t(std::ios::out | std::ios::binary);
|
||
std::ostream_iterator<<b>char</b>, <b>char</b>> oi(t);
|
||
boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format);
|
||
<font color="#000080"><i>// then output to final output stream
|
||
</i></font> <font color="#000080"><i>// adding syntax highlighting:
|
||
</i></font> std::string s(t.str());
|
||
std::ostream_iterator<<b>char</b>, <b>char</b>> out(os);
|
||
boost::regex_merge(out, s.begin(), s.end(), e1, format_string);
|
||
os << footer_text;
|
||
}
|
||
}
|
||
<strong>catch</strong>(...)
|
||
{ <strong>return</strong> -1; }
|
||
<b>return</b> <font color="#000080">0</font>;
|
||
}
|
||
|
||
<b>extern</b> <b>const</b> <b>char</b>* pre_expression = <font
|
||
color="#0000FF">"(<)|(>)|\\r"</font>;
|
||
<b>extern</b> <b>const</b> <b>char</b>* pre_format = <font
|
||
color="#0000FF">"(?1<)(?2>)"</font>;
|
||
|
||
|
||
<b>const</b> <b>char</b>* expression_text = <font color="#000080"><i>// preprocessor directives: index 1
|
||
</i></font> <font color="#0000FF">"(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
|
||
</font> <font color="#000080"><i>// comment: index 2
|
||
</i></font> <font color="#0000FF">"(//[^\\n]*|/\\*.*?\\*/)|"
|
||
</font> <font color="#000080"><i>// literals: index 3
|
||
</i></font> <font color="#0000FF">"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
|
||
</font> <font color="#000080"><i>// string literals: index 4
|
||
</i></font> <font color="#0000FF">"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
|
||
</font> <font color="#000080"><i>// keywords: index 5
|
||
</i></font> <font color="#0000FF">"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
|
||
</font> <font color="#0000FF">"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
|
||
</font> <font color="#0000FF">"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
|
||
</font> <font color="#0000FF">"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
|
||
</font> <font color="#0000FF">"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
|
||
</font> <font color="#0000FF">"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
|
||
</font> <font color="#0000FF">"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
|
||
</font> <font color="#0000FF">"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
|
||
</font> <font color="#0000FF">"|using|virtual|void|volatile|wchar_t|while)\\>"
|
||
</font> ;
|
||
|
||
<b>const</b> <b>char</b>* format_string = <font color="#0000FF">"(?1<font color=\"#008040\">$&</font>)"
|
||
</font> <font color="#0000FF">"(?2<I><font color=\"#000080\">$&</font></I>)"
|
||
</font> <font color="#0000FF">"(?3<font color=\"#0000A0\">$&</font>)"
|
||
</font> <font color="#0000FF">"(?4<font color=\"#0000FF\">$&</font>)"
|
||
</font> <font color="#0000FF">"(?5<B>$&</B>)"</font>;
|
||
|
||
<b>const</b> <b>char</b>* header_text = <font color="#0000FF">"<HTML>\n<HEAD>\n"
|
||
</font> <font color="#0000FF">"<TITLE>Auto-generated html formated source</TITLE>\n"
|
||
</font> <font color="#0000FF">"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
|
||
</font> <font color="#0000FF">"</HEAD>\n"
|
||
</font> <font color="#0000FF">"<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
|
||
</font> <font color="#0000FF">"<P> </P>\n<PRE>"</font>;
|
||
|
||
<b>const</b> <b>char</b>* footer_text = <font color="#0000FF">"</PRE>\n</BODY>\n\n"</font>;</pre>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="regex_split"></a>Algorithm regex_split</h3>
|
||
|
||
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
||
</p>
|
||
|
||
<p>Algorithm regex_split performs a similar operation to the perl
|
||
split operation, and comes in three overloaded forms: </p>
|
||
|
||
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
||
std::size_t regex_split(OutputIterator out,
|
||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
||
<b> unsigned</b> flags,
|
||
std::size_t max_split);
|
||
|
||
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
||
std::size_t regex_split(OutputIterator out,
|
||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
||
<b>unsigned</b> flags = match_default);
|
||
|
||
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1>
|
||
std::size_t regex_split(OutputIterator out,
|
||
std::basic_string<charT, Traits1, Alloc1>& s);</pre>
|
||
|
||
<p>Each version takes an output-iterator for output, and a string
|
||
for input. If the expression contains no marked sub-expressions,
|
||
then the algorithm writes one string onto the output-iterator for
|
||
each section of input that does not match the expression. If the
|
||
expression does contain marked sub-expressions, then each time a
|
||
match is found, one string for each marked sub-expression will be
|
||
written to the output-iterator. No more than <i>max_split </i>strings
|
||
will be written to the output-iterator. Before returning, all the
|
||
input processed will be deleted from the string <i>s</i> (if <i>max_split
|
||
</i>is not reached then all of <i>s</i> will be deleted). Returns
|
||
the number of strings written to the output-iterator. If the
|
||
parameter <i>max_split</i> is not specified then it defaults to
|
||
UINT_MAX. If no expression is specified, then it defaults to
|
||
"\s+", and splitting occurs on whitespace. </p>
|
||
|
||
<p><a href="example/snippets/regex_split_example_1.cpp">Example</a>:
|
||
the following function will split the input string into a series
|
||
of tokens, and remove each token from the string <i>s</i>: </p>
|
||
|
||
<pre><b>unsigned</b> tokenise(std::list<std::string>& l, std::string& s)
|
||
{
|
||
<b> return</b> boost::regex_split(std::back_inserter(l), s);
|
||
}</pre>
|
||
|
||
<p><a href="example/snippets/regex_split_example_2.cpp">Example</a>:
|
||
the following short program will extract all of the URL's from a
|
||
html file, and print them out to <i>cout</i>: </p>
|
||
|
||
<pre><font color="#008000">#include <list>
|
||
#include <fstream>
|
||
#include <iostream>
|
||
#include <boost/regex.hpp>
|
||
</font>
|
||
boost::regex e(<font color="#000080">"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</font>,
|
||
boost::regbase::normal | boost::regbase::icase);
|
||
|
||
<b>void</b> load_file(std::string& s, std::istream& is)
|
||
{
|
||
s.erase();
|
||
<font color="#000080">//
|
||
// attempt to grow string buffer to match file size,
|
||
// this doesn't always work...
|
||
</font> s.reserve(is.rdbuf()-&gtin_avail());
|
||
<b>char</b> c;
|
||
<b>while</b>(is.get(c))
|
||
{
|
||
<font color="#000080">// use logarithmic growth stategy, in case
|
||
// in_avail (above) returned zero:
|
||
</font> <b>if</b>(s.capacity() == s.size())
|
||
s.reserve(s.capacity() * 3);
|
||
s.append(1, c);
|
||
}
|
||
}
|
||
|
||
|
||
<b>int</b> main(<b>int</b> argc, <b>char</b>** argv)
|
||
{
|
||
std::string s;
|
||
std::list<std::string> l;
|
||
|
||
<b>for</b>(<b>int</b> i = 1; i < argc; ++i)
|
||
{
|
||
std::cout << <font color="#000080">"Findings URL's in "</font> << argv[i] << <font
|
||
color="#000080">":"</font> << std::endl;
|
||
s.erase();
|
||
std::ifstream is(argv[i]);
|
||
load_file(s, is);
|
||
boost::regex_split(std::back_inserter(l), s, e);
|
||
<b>while</b>(l.size())
|
||
{
|
||
s = *(l.begin());
|
||
l.pop_front();
|
||
std::cout << s << std::endl;
|
||
}
|
||
}
|
||
<b>return</b> 0;
|
||
}</pre>
|
||
|
||
<hr>
|
||
|
||
<h3><a name="partial_matches"></a>Partial Matches</h3>
|
||
|
||
<p>The match-flag <code>match_partial</code> can be passed to the
|
||
following algorithms: <a href="#reg_match">regex_match</a>, <a
|
||
href="#reg_search">regex_search</a>, and <a href="#reg_grep">regex_grep</a>.
|
||
When used it indicates that partial as well as full matches
|
||
should be found. A partial match is one that matched one or more
|
||
characters at the end of the text input, but did not match all of
|
||
the regular expression (although it may have done so had more
|
||
input been available). Partial matches are typically used when
|
||
either validating data input (checking each character as it is
|
||
entered on the keyboard), or when searching texts that are either
|
||
too long to load into memory (or even into a memory mapped file),
|
||
or are of indeterminate length (for example the source may be a
|
||
socket or similar). Partial and full matches can be
|
||
differentiated as shown in the following table (the variable M
|
||
represents an instance of match_results<> as filled in by
|
||
regex_match, regex_search or regex_grep):<br>
|
||
</p>
|
||
|
||
<table border="0" cellpadding="7" cellspacing="0" width="638">
|
||
<tr>
|
||
<td valign="top" width="20%"> </td>
|
||
<td valign="top" width="20%">Result</td>
|
||
<td valign="top" width="20%">M[0].matched</td>
|
||
<td valign="top" width="20%">M[0].first</td>
|
||
<td valign="top" width="20%">M[0].second</td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="20%">No match</td>
|
||
<td valign="top" width="20%">False</td>
|
||
<td valign="top" width="20%">Undefined</td>
|
||
<td valign="top" width="20%">Undefined</td>
|
||
<td valign="top" width="20%">Undefined</td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="20%">Partial match</td>
|
||
<td valign="top" width="20%">True</td>
|
||
<td valign="top" width="20%">False</td>
|
||
<td valign="top" width="20%">Start of partial match.</td>
|
||
<td valign="top" width="20%">End of partial match (end of
|
||
text).</td>
|
||
</tr>
|
||
<tr>
|
||
<td valign="top" width="20%">Full match</td>
|
||
<td valign="top" width="20%">True</td>
|
||
<td valign="top" width="20%">True</td>
|
||
<td valign="top" width="20%">Start of full match.</td>
|
||
<td valign="top" width="20%">End of full match.</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p>The following <a
|
||
href="example/snippets/partial_regex_match.cpp">example</a> tests
|
||
to see whether the text could be a valid credit card number, as
|
||
the user presses a key, the character entered would be added to
|
||
the string being built up, and passed to <code>is_possible_card_number</code>.
|
||
If this returns true then the text could be a valid card number,
|
||
so the user interface's OK button would be enabled. If it returns
|
||
false, then this is not yet a valid card number, but could be
|
||
with more input, so the user interface would disable the OK
|
||
button. Finally, if the procedure throws an exception the input
|
||
could never become a valid number, and the inputted character
|
||
must be discarded, and a suitable error indication displayed to
|
||
the user.</p>
|
||
|
||
<pre>#include <string>
|
||
#include <iostream>
|
||
#include <boost/regex.hpp>
|
||
|
||
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
||
|
||
bool is_possible_card_number(const std::string& input)
|
||
{
|
||
//
|
||
// return false for partial match, true for full match, or throw for
|
||
// impossible match based on what we have so far...
|
||
boost::match_results<std::string::const_iterator> what;
|
||
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
||
{
|
||
// the input so far could not possibly be valid so reject it:
|
||
throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
|
||
}
|
||
// OK so far so good, but have we finished?
|
||
if(what[0].matched)
|
||
{
|
||
// excellent, we have a result:
|
||
return true;
|
||
}
|
||
// what we have so far is only a partial match...
|
||
return false;
|
||
}</pre>
|
||
|
||
<p>In the following <a
|
||
href="example/snippets/partial_regex_match.cpp">example</a>, text
|
||
input is taken from a stream containing an unknown amount of
|
||
text; this example simply counts the number of html tags
|
||
encountered in the stream. The text is loaded into a buffer and
|
||
searched a part at a time, if a partial match was encountered,
|
||
then the partial match gets searched a second time as the start
|
||
of the next batch of text:</p>
|
||
|
||
<pre>#include <iostream>
|
||
#include <fstream>
|
||
#include <sstream>
|
||
#include <string>
|
||
#include <boost/regex.hpp>
|
||
|
||
// match some kind of html tag:
|
||
boost::regex e("<[^>]*>");
|
||
// count how many:
|
||
unsigned int tags = 0;
|
||
// saved position of partial match:
|
||
char* next_pos = 0;
|
||
|
||
bool grep_callback(const boost::match_results<char*>& m)
|
||
{
|
||
if(m[0].matched == false)
|
||
{
|
||
// save position and return:
|
||
next_pos = m[0].first;
|
||
}
|
||
else
|
||
++tags;
|
||
return true;
|
||
}
|
||
|
||
void search(std::istream& is)
|
||
{
|
||
char buf[4096];
|
||
next_pos = buf + sizeof(buf);
|
||
bool have_more = true;
|
||
while(have_more)
|
||
{
|
||
// how much do we copy forward from last try:
|
||
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
||
// and how much is left to fill:
|
||
unsigned size = next_pos - buf;
|
||
// copy forward whatever we have left:
|
||
memcpy(buf, next_pos, leftover);
|
||
// fill the rest from the stream:
|
||
unsigned read = is.readsome(buf + leftover, size);
|
||
// check to see if we've run out of text:
|
||
have_more = read == size;
|
||
// reset next_pos:
|
||
next_pos = buf + sizeof(buf);
|
||
// and then grep:
|
||
boost::regex_grep(grep_callback,
|
||
buf,
|
||
buf + read + leftover,
|
||
e,
|
||
boost::match_default | boost::match_partial);
|
||
}
|
||
}</pre>
|
||
|
||
<hr align="left">
|
||
|
||
<p><i>Copyright </i><a href="mailto:John_Maddock@compuserve.com"><i>Dr
|
||
John Maddock</i></a><i> 1998-2001 all rights reserved.</i> </p>
|
||
</body>
|
||
</html>
|