mirror of
https://github.com/boostorg/regex.git
synced 2025-07-05 08:36:31 +02:00
2272 lines
113 KiB
HTML
2272 lines
113 KiB
HTML
<html>
|
|
|
|
<head>
|
|
<meta http-equiv="Content-Type"
|
|
content="text/html; charset=iso-8859-1">
|
|
<meta name="Template"
|
|
content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
|
|
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
|
|
<title>Regex++, template class and algorithm reference</title>
|
|
</head>
|
|
|
|
<body bgcolor="#FFFFFF" link="#0000FF" vlink="#800080">
|
|
|
|
<p> </p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="50%"><h3 align="right"><img
|
|
src="../../c++boost.gif" alt="C++ Boost" width="276"
|
|
height="86"></h3>
|
|
</td>
|
|
<td valign="top" width="50%"><h2 align="center">Regex++,
|
|
Template Class and Algorithm Reference.</h2>
|
|
<p><i>(version 3.01, 18 April 2000)</i> </p>
|
|
<pre><i>Copyright (c) 1998-9
|
|
Dr John Maddock
|
|
|
|
Permission to use, copy, modify, distribute and sell this software
|
|
and its documentation for any purpose is hereby granted without fee,
|
|
provided that the above copyright notice appear in all copies and
|
|
that both that copyright notice and this permission notice appear
|
|
in supporting documentation. Dr John Maddock makes no representations
|
|
about the suitability of this software for any purpose.
|
|
It is provided "as is" without express or implied warranty.</i></pre>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regbase"></a>class regbase</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>Class regbase is the template argument independent base class
|
|
for reg_expression, the only public members are the <i>flag_type</i>
|
|
enumerated values that determine how regular expressions are
|
|
interpreted. </p>
|
|
|
|
<pre><b>class</b> regbase
|
|
{
|
|
<b>public</b>:
|
|
<b>enum</b> flag_type_
|
|
{
|
|
escape_in_lists = 1, <font
|
|
color="#000080">// '\\' special inside [...] </font>
|
|
char_classes = escape_in_lists << 1, <font
|
|
color="#000080"><i>// [[:CLASS:]] allowed
|
|
</i></font> intervals = char_classes << 1, <font
|
|
color="#000080"><i>// {x,y} allowed </i></font>
|
|
limited_ops = intervals << 1, <font
|
|
color="#000080"><i>// all of + ? and | are normal characters
|
|
</i></font> newline_alt = limited_ops << 1, <font
|
|
color="#000080"><i>// \n is the same as |
|
|
</i></font> bk_plus_qm = newline_alt << 1, <font
|
|
color="#000080"><i>// uses \+ and \? </i></font>
|
|
bk_braces = bk_plus_qm << 1, <font
|
|
color="#000080"><i>// uses \{ and \}
|
|
</i></font> bk_parens = bk_braces << 1, <font
|
|
color="#000080"><i>// uses \( and \) </i></font>
|
|
bk_refs = bk_parens << 1, <font
|
|
color="#000080"><i>// \d allowed </i></font>
|
|
bk_vbar = bk_refs << 1, <font
|
|
color="#000080"><i>// uses \|
|
|
</i></font> use_except = bk_vbar << 1, <font
|
|
color="#000080"><i>// exception on error
|
|
</i></font> failbit = use_except << 1, <font
|
|
color="#000080"><i>// error flag </i></font>
|
|
literal = failbit << 1, <font
|
|
color="#000080"><i>// all characters are literals
|
|
</i></font> icase = literal << 1, <font
|
|
color="#000080"><i>// characters are matched regardless of case
|
|
</i></font> nocollate = icase << 1, <font
|
|
color="#000080"><i>// don't use locale specific collation </i></font>
|
|
|
|
basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
|
|
extended = char_classes | intervals | bk_refs,
|
|
normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate,
|
|
emacs = bk_braces | bk_parens | bk_refs | bk_vbar,
|
|
awk = extended | escape_in_lists,
|
|
grep = basic | newline_alt,
|
|
egrep = extended | newline_alt,
|
|
sed = basic,
|
|
perl = normal
|
|
};
|
|
<b>typedef</b> <b>unsigned</b> <b>int</b> flag_type;
|
|
}; </pre>
|
|
|
|
<p> <br>
|
|
<br>
|
|
</p>
|
|
|
|
<p>The enumerated type <i>regbase::flag_type</i> determines the
|
|
syntax rules for regular expression compilation, the various
|
|
flags have the following effects: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="20%">regbase::escape_in_lists</td>
|
|
<td valign="top" width="45%">Allows the use of the escape
|
|
"\" character in sets of characters, for
|
|
example [\]] represents the set of characters containing
|
|
only "]". If this flag is not set then "\"
|
|
is an ordinary character inside sets.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::char_classes</td>
|
|
<td valign="top" width="45%">When this bit is set,
|
|
character classes [:classname:] are allowed inside
|
|
character set declarations, for example "[[:word:]]"
|
|
represents the set of all characters that belong to the
|
|
character class "word".</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: intervals</td>
|
|
<td valign="top" width="45%">When this bit is set,
|
|
repetition intervals are allowed, for example "a{2,4}"
|
|
represents a repeat of between 2 and 4 letter a's.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: limited_ops</td>
|
|
<td valign="top" width="45%">When this bit is set all of
|
|
"+", "?" and "|" are
|
|
ordinary characters in all situations.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: newline_alt</td>
|
|
<td valign="top" width="45%">When this bit is set, then
|
|
the newline character "\n" has the same effect
|
|
as the alternation operator "|".</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_plus_qm</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\+" represents the one or more repetition
|
|
operator and "\?" represents the zero or one
|
|
repetition operator. When this bit is not set then "+"
|
|
and "?" are used instead.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_braces</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\{" and "\}" are used for bounded
|
|
repetitions and "{" and "}" are
|
|
normal characters. This is the opposite of default
|
|
behavior.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_parens</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\(" and "\)" are used to group sub-expressions
|
|
and "(" and ")" are ordinary
|
|
characters, this is the opposite of default behaviour.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_refs</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
back references are allowed.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: bk_vbar</td>
|
|
<td valign="top" width="45%">When this bit is set then
|
|
"\|" represents the alternation operator and
|
|
"|" is an ordinary character. This is the
|
|
opposite of default behaviour.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: use_except</td>
|
|
<td valign="top" width="45%">When this bit is set then a <a
|
|
href="#bad_expression">bad_expression</a> exception will
|
|
be thrown on error. Use of this flag is deprecated
|
|
- reg_expression will always throw on error.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase:: failbit</td>
|
|
<td valign="top" width="45%">This bit is set on error, if
|
|
regbase::use_except is not set, then this bit should be
|
|
checked to see if a regular expression is valid before
|
|
usage.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::literal</td>
|
|
<td valign="top" width="45%">All characters in the string
|
|
are treated as literals, there are no special characters
|
|
or escape sequences.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::icase</td>
|
|
<td valign="top" width="45%">All characters in the string
|
|
are matched regardless of case.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::nocollate</td>
|
|
<td valign="top" width="45%">Locale specific collation is
|
|
disabled when dealing with ranges in character set
|
|
declarations. For example when this bit is set the
|
|
expression [a-c] would match the characters a, b and c
|
|
only regardless of locale, where as when this is not set
|
|
, then [a-c] matches any character which collates in the
|
|
range a to c.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">regbase::basic</td>
|
|
<td valign="top" width="45%">Equivalent to the POSIX
|
|
basic regular expression syntax: char_classes | intervals
|
|
| limited_ops | bk_braces | bk_parens | bk_refs.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%"> </td>
|
|
<td valign="top" width="45%">Regbase::extended</td>
|
|
<td valign="top" width="45%">Equivalent to the POSIX
|
|
extended regular expression syntax: char_classes |
|
|
intervals | bk_refs.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::normal</td>
|
|
<td valign="top" width="45%" height="24">This is the
|
|
default setting, and represents how most people expect
|
|
the library to behave. Equivalent to the POSIX extended
|
|
syntax, but with locale specific collation disabled, and
|
|
escape characters inside set declarations enabled:
|
|
regbase::escape_in_lists | regbase::char_classes |
|
|
regbase::intervals | regbase::bk_refs | regbase::nocollate.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::emacs</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatability with the emacs editor, eqivalent to: bk_braces
|
|
| bk_parens | bk_refs | bk_vbar.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::awk </td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix utility Awk, the same as POSIX
|
|
extended regular expressions, but allows escapes inside
|
|
bracket-expressions (character sets). Equivalent to
|
|
extended | escape_in_lists.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::grep</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix grep utility, the same as
|
|
POSIX basic regular expressions, but with the newline
|
|
character equivalent to the alternation operator. the
|
|
same as basic | newline_alt.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::egrep</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix egrep utility, the same as
|
|
POSIX extended regular expressions, but with the newline
|
|
character equivalent to the alternation operator. the
|
|
same as extended | newline_alt.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::sed</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatabilty with the Unix sed utility, the same as POSIX
|
|
basic regular expressions.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
<td valign="top" width="45%" height="24">regbase::perl</td>
|
|
<td valign="top" width="45%" height="24">Provides
|
|
compatibility with the perl programming language, the
|
|
same as regbase::normal.</td>
|
|
<td valign="top" width="5%" height="24"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="bad_expression"></a>Exception classes.</h3>
|
|
|
|
<p>#include <<a href="../../boost/pattern_except.hpp">boost/pat_except.hpp</a>>
|
|
</p>
|
|
|
|
<p>An instance of <i>bad_expression</i> is thrown whenever a bad
|
|
regular expression is encountered. </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
|
|
<b>class</b> bad_pattern : <b>public</b> std::runtime_error
|
|
{
|
|
<b>public</b>:
|
|
<b>explicit</b> bad_pattern(<b>const</b> std::string& s) : std::runtime_error(s){};
|
|
};
|
|
|
|
<b>class</b> bad_expression : <b>public</b> bad_pattern
|
|
{
|
|
<b>public</b>:
|
|
bad_expression(<b>const</b> std::string& s) : bad_pattern(s) {}
|
|
};
|
|
|
|
|
|
} // namespace boost</pre>
|
|
|
|
<p>Footnotes: the class <i>bad_pattern </i>forms the base class
|
|
for all pattern-matching exceptions, of which <i>bad_expression</i>
|
|
is one. The choice of <i>std::runtime_error </i>as the base class
|
|
for <i>bad_pattern</i> is moot, depending upon how the library is
|
|
used exceptions may be either logic errors (programmer supplied
|
|
expressions) or run time errors (user supplied expressions). <br>
|
|
</p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_expression"></a>Class reg_expression</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The template class <i>reg_expression </i>encapsulates regular
|
|
expression parsing and compilation. The class derives from class <a
|
|
href="#regbase"><i>regbase</i></a> and takes three template
|
|
parameters: </p>
|
|
|
|
<p><b><i>charT</i></b>: determines the character type, i.e.
|
|
either char or wchar_t. </p>
|
|
|
|
<p><b><i>traits</i></b>: determines the behaviour of the
|
|
character type, for example whether character matching is case
|
|
sensitive or not, and which character class names are recognized.
|
|
A default traits class is provided: <a href="#regex_char_traits">regex_traits<charT></a>.
|
|
</p>
|
|
|
|
<p><b><i>Allocator</i></b>: the allocator class used to allocate
|
|
memory by the class. </p>
|
|
|
|
<p>For ease of use there are two typedefs that define the two
|
|
standard <i>reg_expression</i> instances, unless you want to use
|
|
custom allocators, you won't need to use anything other than
|
|
these: </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> traits = regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
|
<b>class</b> reg_expression;
|
|
<b>typedef</b> reg_expression<<b>char</b>> regex;
|
|
<b>typedef</b> reg_expression<<b>wchar_t> </b>wregex;
|
|
}</pre>
|
|
|
|
<p>The definition of <i>reg_expression</i> follows: it is based
|
|
very closely on class basic_string, and fulfils the requirements
|
|
for a container of <i>charT</i>. </p>
|
|
|
|
<pre><b>namespace</b> boost{
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> traits = char_regex_traits<charT>, <b>class</b> Allocator = std::allocator<charT> >
|
|
<b>class</b> reg_expression : <b>public</b> regbase
|
|
{
|
|
<b>public</b>:
|
|
<font color="#000080"><i> // typedefs: </i></font>
|
|
<b> typedef</b> charT char_type;
|
|
<b>typedef</b> traits traits_type;
|
|
<font color="#000080"> <i>// locale_type </i>
|
|
<i>// placeholder for actual locale type used by the
|
|
</i> <i>// traits class to localise *this.
|
|
</i></font> <b>typedef</b> typename traits::locale_type locale_type;
|
|
<font color="#000080"> <i>// value_type </i></font>
|
|
<b>typedef</b> charT value_type;
|
|
<font color="#000080"> <i>// reference, const_reference </i></font>
|
|
<b>typedef</b> charT& reference;
|
|
<b>typedef</b> <b>const</b> charT& const_reference;
|
|
<font color="#000080"> <i>// iterator, const_iterator </i></font>
|
|
<b>typedef</b> <b>const</b> charT* const_iterator;
|
|
<b>typedef</b> const_iterator iterator; <font
|
|
color="#000080">
|
|
<i>// difference_type </i></font>
|
|
<b>typedef</b> <b>typename</b> Allocator::difference_type difference_type;
|
|
<font color="#000080"> <i>// size_type
|
|
</i></font> <b>typedef</b> <b>typename</b> Allocator::size_type size_type;
|
|
<font color="#000080"><i>// allocator_type </i></font>
|
|
<b> typedef</b> Allocator allocator_type;
|
|
<b>typedef</b> Allocator alloc_type;
|
|
<font color="#000080"> <i>// flag_type </i></font>
|
|
<b>typedef</b> jm_uintfast32_t flag_type;
|
|
<b>public</b>:
|
|
<font color="#000080"><em>// constructors</em></font>
|
|
<strong>explicit</strong> reg_expression(<b>const</b> Allocator& a = Allocator());
|
|
<strong>explicit</strong> reg_expression(<b>const</b> charT* p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> charT* p1, <b>const</b> charT* p2, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> charT* p, size_type len, flag_type f, <b>const</b> Allocator& a = Allocator());
|
|
reg_expression(<b>const</b> reg_expression&);
|
|
<b> template</b> <<b>class</b> ST, <b>class</b> SA>
|
|
<strong>explicit</strong> reg_expression(<b>const</b> std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator()); <b>
|
|
template</b> <<b>class</b> I>
|
|
reg_expression(I first, I last, flag_type f = regbase::normal, <b>const</b> Allocator& a = Allocator());
|
|
~reg_expression();
|
|
reg_expression& <b>operator</b>=(<b>const</b> reg_expression&);
|
|
reg_expression& <b>operator</b>=(<b>const</b> charT* ptr); <b>
|
|
template</b> <<b>class</b> ST, <b>class</b> SA>
|
|
reg_expression& <b>operator</b>=(<b>const</b> std::basic_string<charT, ST, SA>& p);
|
|
<font color="#000080"> <i>//
|
|
</i> <i>// assign:
|
|
</i></font> reg_expression& assign(<b>const</b> reg_expression& that);
|
|
reg_expression& assign(<b>const</b> charT* ptr, flag_type f = regbase::normal);
|
|
reg_expression& assign(<b>const</b> charT* first, <b>const</b> charT* last, flag_type f = regbase::normal);
|
|
<b> template</b> <<b>class</b> string_traits, <b>class</b> A>
|
|
reg_expression& assign(
|
|
<b>const</b> std::basic_string<charT, string_traits, A>& s,
|
|
flag_type f = regbase::normal);
|
|
<b>template</b> <<b>class</b> iterator>
|
|
reg_expression& assign(iterator first,
|
|
iterator last,
|
|
flag_type f = regbase::normal);
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// allocator access:
|
|
</i></font> Allocator get_allocator()<b>const</b>;
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// locale:
|
|
</i></font> locale_type imbue(<b>const</b> locale_type& l);
|
|
locale_type getloc()<b>const</b>; <font
|
|
color="#000080"><i>
|
|
// </i>
|
|
<i>// flags:
|
|
</i></font> flag_type getflags()<b>const</b>;
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// str:
|
|
</i></font> std::basic_string<charT> str()<b>const</b>;
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// begin, end:
|
|
</i></font> const_iterator begin()<b>const</b>;
|
|
const_iterator end()<b>const</b>; <font
|
|
color="#000080">
|
|
<i>// </i>
|
|
<i>// swap:
|
|
</i></font> <b>void</b> swap(reg_expression&)<b>throw</b>();
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// size:
|
|
</i></font> size_type size()<b>const</b>;
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// max_size:
|
|
</i></font> size_type max_size()<b>const</b>;
|
|
<font color="#000080"> <i>// </i>
|
|
<i>// empty:
|
|
</i></font> <b>bool</b> empty()<b>const</b>;
|
|
<b>unsigned</b> mark_count()<b>const</b>;
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> reg_expression&)<b>const</b>;
|
|
<b>bool</b> <b>operator</b><(<b>const</b> reg_expression&)<b>const</b>;
|
|
};
|
|
} <font color="#000080"><i>// namespace boost </i></font></pre>
|
|
|
|
<p>Class reg_expression has the following public member functions:
|
|
<br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression(Allocator a =
|
|
Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs a default
|
|
instance of reg_expression without any expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression(charT* p, <b>unsigned</b>
|
|
f = regbase::normal, Allocator a = Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs an instance
|
|
of reg_expression from the expression denoted by the null
|
|
terminated string <b>p</b>, using the flags <b>f</b> to
|
|
determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression(charT* p1,
|
|
charT* p2, <b>unsigned</b> f = regbase::normal, Allocator
|
|
a = Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs an instance
|
|
of reg_expression from the expression denoted by pair of
|
|
iterators <b>p1</b> and <b>p2</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression(charT* p,
|
|
size_type len, <b>unsigned</b> f, Allocator a = Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs an instance
|
|
of reg_expression from the expression denoted by the
|
|
string <b>p</b> of length <b>len</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>template</b> <class ST,
|
|
class SA> <br>
|
|
reg_expression(<b>const</b> std::basic_string<charT,
|
|
ST, SA>& p, jm_uintfast32_t f = regbase::normal, <b>const</b>
|
|
Allocator& a = Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs an instance
|
|
of reg_expression from the expression denoted by the
|
|
string <b>p</b>, using the flags <b>f</b> to determine
|
|
regular expression syntax. See class <a href="#regbase">regbase</a>
|
|
for allowable flag values. <p>Note - this member may not
|
|
be available depending upon your compiler capabilities.</p>
|
|
</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">template <class I> <br>
|
|
reg_expression(I first, I last, flag_type f = regbase::normal,
|
|
const Allocator& a = Allocator());</td>
|
|
<td valign="top" width="45%"> Constructs an instance
|
|
of reg_expression from the expression denoted by pair of
|
|
iterators <b>p1</b> and <b>p2</b>, using the flags <b>f</b>
|
|
to determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression(<b>const</b>
|
|
reg_expression&);</td>
|
|
<td valign="top" width="45%">Copy constructor - copies an
|
|
existing regular expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression& <b>operator</b>=(<b>const</b>
|
|
reg_expression&);</td>
|
|
<td valign="top" width="45%">Copies an existing regular
|
|
expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression& <b>operator</b>=(<b>const</b>
|
|
charT* ptr);</td>
|
|
<td valign="top" width="45%">Equivalent to assign(ptr);</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">template <class ST, class
|
|
SA> <p>reg_expression& operator=(const std::basic_string<charT,
|
|
ST, SA>& p);</p>
|
|
</td>
|
|
<td valign="top" width="45%">Equivalent to assign(p);</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
|
reg_expression& that);</td>
|
|
<td valign="top" width="45%">Copies the regular
|
|
expression contained by <b>that</b>, throws <a
|
|
href="#bad_expression">bad_expression</a> if <b>that</b>
|
|
does not contain a valid expression. Returns *this.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
|
charT* p, flag_type f = regbase::normal);</td>
|
|
<td valign="top" width="45%">Compiles a regular
|
|
expression from the expression denoted by the null
|
|
terminated string <b>p</b>, using the flags <b>f</b> to
|
|
determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.
|
|
Throws <a href="#bad_expression">bad_expression</a> if <b>p</b>
|
|
does not contain a valid expression. Returns *this.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">reg_expression& assign(<b>const</b>
|
|
charT* first, <b>const</b> charT* last, flag_type f =
|
|
regbase::normal);</td>
|
|
<td valign="top" width="45%">Compiles a regular
|
|
expression from the expression denoted by the pair of
|
|
iterators <b>first-last</b>, using the flags <b>f</b> to
|
|
determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.
|
|
Throws <a href="#bad_expression">bad_expression</a> if <b>first-last</b>
|
|
does not contain a valid expression. Returns *this.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>template</b> <<b>class</b>
|
|
string_traits, <b>class</b> A> <br>
|
|
reg_expression& assign(<b>const</b> std::basic_string<charT,
|
|
string_traits, A>& s, flag_type f = regbase::normal);</td>
|
|
<td valign="top" width="45%">Compiles a regular
|
|
expression from the expression denoted by the string <b>s</b>,
|
|
using the flags <b>f</b> to determine regular expression
|
|
syntax. See class <a href="#regbase">regbase</a> for
|
|
allowable flag values. Throws <a href="#bad_expression">bad_expression</a>
|
|
if <b>s</b> does not contain a valid expression. Returns
|
|
*this.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">template <class iterator>
|
|
<br>
|
|
reg_expression& assign(iterator first, iterator last,
|
|
flag_type f = regbase::normal);</td>
|
|
<td valign="top" width="45%">Compiles a regular
|
|
expression from the expression denoted by the pair of
|
|
iterators <b>first-last</b>, using the flags <b>f</b> to
|
|
determine regular expression syntax. See class <a
|
|
href="#regbase">regbase</a> for allowable flag values.
|
|
Throws <a href="#bad_expression">bad_expression</a> if <b>first-last</b>
|
|
does not contain a valid expression. Returns *this.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">Allocator get_allocator()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the allocator used
|
|
by the expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">locale_type imbue(<b>const</b>
|
|
locale_type& l);</td>
|
|
<td valign="top" width="45%">Imbues the expression with
|
|
the specified locale, and invalidates the current
|
|
expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">locale_type getloc()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the locale used by
|
|
the expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">flag_type getflags()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the flags used to
|
|
compile the current expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">std::basic_string<charT>
|
|
str()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the current
|
|
expression as a string.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">const_iterator begin()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns a pointer to the
|
|
first character of the current expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">const_iterator end()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns a pointer to the end
|
|
of the current expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">size_type size()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the length of the
|
|
current expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">size_type max_size()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the maximum length
|
|
of a regular expression text.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>bool</b> empty()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns true if the object
|
|
contains no valid expression.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>unsigned</b> mark_count()<b>const</b>
|
|
;</td>
|
|
<td valign="top" width="45%">Returns the number of sub-expressions
|
|
in the compiled regular expression. Note that this
|
|
includes the whole match (subexpression zero), so the
|
|
value returned is always >= 1.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regex_char_traits"></a><i>Class regex_traits</i></h3>
|
|
|
|
<p>#include <<a href="../../boost/regex_traits.hpp">boost/regex_traits.hpp</a>>
|
|
</p>
|
|
|
|
<p><i>This is a preliminary version of the regular expression
|
|
traits class, and is subject to change</i>. </p>
|
|
|
|
<p>The purpose of the traits class is to make it easier to
|
|
customise the behaviour of <i>reg_expression </i>and the
|
|
associated matching algorithms. Custom traits classes can handle
|
|
special character sets or define additional character classes,
|
|
for example one could define [[:kanji:]] as the set of all (Unicode)
|
|
kanji characters. This library provides three traits classes and
|
|
a wrapper class <i>regex_traits</i>, which inherits from one of
|
|
these depending upon the default localisation model in use, class
|
|
<i>c_regex_traits</i> encapsulates the global C locale, class <i>w32_regex_traits</i>
|
|
encapsulates the global Win32 locale (only available on Win32
|
|
systems), and class <i>cpp_regex_traits</i> encapsulates the C++
|
|
locale (only provided if std::locale is supported): </p>
|
|
|
|
<pre>template <class charT> class c_regex_traits;
|
|
template<> class c_regex_traits<char> { /*details*/ };
|
|
template<> class c_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class w32_regex_traits;
|
|
template<> class w32_regex_traits<char> { /*details*/ };
|
|
template<> class w32_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class cpp_regex_traits;
|
|
template<> class cpp_regex_traits<char> { /*details*/ };
|
|
template<> class cpp_regex_traits<wchar_t> { /*details*/ };
|
|
|
|
template <class charT> class regex_traits : public base_type { /*detailts*/ };</pre>
|
|
|
|
<p>Where "<i>base_type</i>" defaults to <i>w32_regex_traits</i>
|
|
on Win32 systems, and <i>c_regex_traits</i> otherwise. The
|
|
default behaviour can be changed by defining one of BOOST_RE_LOCALE_C
|
|
(forces use of <i>c_regex_traits</i> by default), or BOOST_RE_LOCALE_CPP
|
|
(forces use of <i>cpp_regex_traits</i> by default). Alternatively
|
|
a specific traits class can be passed to the <i>reg_expression</i>
|
|
template. </p>
|
|
|
|
<p>The requirements for custom traits classes are <a
|
|
href="traits_class_ref.htm">documented separately here....</a> <br>
|
|
</p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_match"></a><i>Class match_results</i></h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>Regular expressions are different from many simple pattern-matching
|
|
algorithms in that as well as finding an overall match they can
|
|
also produce sub-expression matches: each sub-expression being
|
|
delimited in the pattern by a pair of parenthesis (...). There
|
|
has to be some method for reporting sub-expression matches back
|
|
to the user: this is achieved this by defining a class <i>match_results</i>
|
|
that acts as an indexed collection of sub-expression matches,
|
|
each sub-expression match being contained in an object of type <i>sub_match</i>.
|
|
</p>
|
|
|
|
<pre><font color="#000080"><i>//
|
|
// class sub_match:
|
|
// denotes one sub-expression match.
|
|
//
|
|
</i></font><b>template</b> <<b>class</b> iterator>
|
|
<b>struct</b> sub_match
|
|
{
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type value_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
|
<b>typedef</b> iterator iterator_type;
|
|
|
|
iterator first;
|
|
iterator second;
|
|
<b>bool</b> matched;
|
|
|
|
<b>operator</b> std::basic_string<value_type>()<b>const</b>;
|
|
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> sub_match& that)<b>const</b>;
|
|
<b>bool</b> <b>operator</b> !=(<b>const</b> sub_match& that)<b>const</b>;
|
|
difference_type length()<b>const</b>;
|
|
};
|
|
|
|
<font color="#000080">//
|
|
// class match_results:
|
|
// contains an indexed collection of matched sub-expressions.
|
|
//
|
|
</font><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator = std::allocator<<strong>typename</strong> std::iterator_traits<iterator>::value_type > >
|
|
<b>class</b> match_results
|
|
{ <b>
|
|
public</b>:
|
|
<b>typedef</b> Allocator alloc_type;
|
|
<b>typedef</b> <b>typename</b> Allocator::<b>template</b> Rebind<iterator>::size_type size_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::value_type char_type;
|
|
<b>typedef</b> sub_match<iterator> value_type;
|
|
<b>typedef</b> <b>typename</b> std::iterator_traits<iterator>::difference_type difference_type;
|
|
<b>typedef</b> iterator iterator_type;
|
|
<strong>explicit</strong> match_results(<b>const</b> Allocator& a = Allocator());
|
|
match_results(<b>const</b> match_results& m);
|
|
match_results& <b>operator</b>=(<b>const</b> match_results& m);
|
|
~match_results();
|
|
size_type size()<b>const</b>;
|
|
<b>const</b> sub_match<iterator>& <b>operator</b>[](<b>int</b> n) <b>const</b>;
|
|
Allocator allocator()<b>const</b>;
|
|
difference_type length(<b>int</b> sub = 0)<b>const</b>;
|
|
difference_type position(<b>unsigned</b> <b>int</b> sub = 0)<b>const</b>;
|
|
<b>unsigned</b> <b>int</b> line()<b>const</b>;
|
|
iterator line_start()<b>const</b>;
|
|
std::basic_string<char_type> str(<b>int</b> sub = 0)<b>const</b>;
|
|
<b>void</b> swap(match_results& that);
|
|
<b>bool</b> <b>operator</b>==(<b>const</b> match_results& that)<b>const</b>;
|
|
<b>bool</b> <b>operator</b><(<b>const</b> match_results& that)<b>const</b>;
|
|
};</pre>
|
|
|
|
<pre><strong>typedef</strong> match_results<<strong>const</strong> <strong>char</strong>*> cmatch;
|
|
<strong>typedef</strong> match_results<<strong>const</strong> <strong>wchar_t</strong>*> wcmatch; </pre>
|
|
|
|
<p>Class match_results is used for reporting what matched a
|
|
regular expression, it is passed to the matching algorithms <a
|
|
href="#query_match">regex_match</a> and <a href="#reg_search">regex_search</a>,
|
|
and is used by <a href="#reg_grep">regex_grep</a> to notify the
|
|
callback function (or function object) what matched. Note that
|
|
the default allocator parameter has been chosen to match the
|
|
default allocator parameter to reg_expression. match_results has
|
|
the following public member functions: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_results(Allocator a =
|
|
Allocator());</td>
|
|
<td valign="top" width="45%">Constructs an instance of
|
|
match_results, using allocator instance a.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_results(const match_results&
|
|
m);</td>
|
|
<td valign="top" width="45%">Copy constructor.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_results& operator=(const
|
|
match_results& m);</td>
|
|
<td valign="top" width="45%">Assignment operator.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>const</b> sub_match<iterator>&
|
|
<b>operator</b>[](size_type n) const;</td>
|
|
<td valign="top" width="45%">Returns what matched, item 0
|
|
represents the whole string, item 1 the first sub-expression
|
|
and so on.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">Allocator& allocator()const;</td>
|
|
<td valign="top" width="45%">Returns the allocator used
|
|
by the class.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">difference_type length(<b>unsigned
|
|
int</b> sub = 0);</td>
|
|
<td valign="top" width="45%">Returns the length of the
|
|
matched subexpression, defaults to the length of the
|
|
whole match, in effect this is equivalent to operator[](sub).second
|
|
- operator[](sub).first.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">difference_type position(<b>unsigned
|
|
int</b> sub = 0);</td>
|
|
<td valign="top" width="45%">Returns the position of the
|
|
matched sub-expression, defaults to the position of the
|
|
whole match. The returned value is the position of the
|
|
match relative to the start of the string.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%"><b>unsigned</b> <b>int</b>
|
|
line()<b>const</b>;</td>
|
|
<td valign="top" width="45%">Returns the index of the
|
|
line on which the match occurred, indices start with 1,
|
|
not zero. Equivalent to the number of newline characters
|
|
prior to operator[](0).first plus one.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">iterator line_start()<b>const;</b></td>
|
|
<td valign="top" width="45%">Returns an iterator denoting
|
|
the start of the line on which the match occurred.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">size_type size()<b>const;</b></td>
|
|
<td valign="top" width="45%">Returns how many sub-expressions
|
|
are present in the match, including sub-expression zero (the
|
|
whole match). Returns zero if no matches were found in
|
|
the search operation.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><br>
|
|
</p>
|
|
|
|
<p>The operator[] member function needs further explanation: it
|
|
returns a const reference to a structure of type sub_match<iterator>,
|
|
which has the following public members: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><b>typedef</b> <b>typename</b>
|
|
std::iterator_traits<iterator>::value_type value_type;</td>
|
|
<td valign="top" width="44%">The type pointed to by the
|
|
iterators.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><b>typedef</b> <b>typename</b>
|
|
std::iterator_traits<iterator>::difference_type
|
|
difference_type;</td>
|
|
<td valign="top" width="44%">A type that represents the
|
|
difference between two iterators.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><b>typedef</b> iterator
|
|
iterator_type;</td>
|
|
<td valign="top" width="44%">The iterator type.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">iterator first</td>
|
|
<td valign="top" width="44%">An iterator denoting the
|
|
position of the start of the match.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">iterator second</td>
|
|
<td valign="top" width="44%">An iterator denoting the
|
|
position of the end of the match.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><b>bool</b> matched</td>
|
|
<td valign="top" width="44%">A Boolean value denoting
|
|
whether this sub-expression participated in the match.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">difference_type length()<b>const;</b></td>
|
|
<td valign="top" width="44%">Returns the length of the
|
|
sub-expression match.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%"><b>operator</b> std::basic_string<value_type>
|
|
()<b>const</b>;</td>
|
|
<td valign="top" width="44%">Converts the sub-expression
|
|
match into an instance of std::basic_string<>. Note
|
|
that this member may be either absent, or present to a
|
|
more limited degree depending upon your compiler
|
|
capabilities.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p>Operator[] takes an integer as an argument that denotes the
|
|
sub-expression for which to return information, the argument can
|
|
take the following special values: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">-2</td>
|
|
<td valign="top" width="44%">Returns everything from the
|
|
end of the match, to the end of the input string,
|
|
equivalent to $' in perl. If this is a null string, then:
|
|
<p>first == second </p>
|
|
<p>And </p>
|
|
<p>matched == false.</p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">-1</td>
|
|
<td valign="top" width="44%">Returns everything from the
|
|
start of the input string (or the end of the last match
|
|
if this is a grep operation), to the start of this match.
|
|
Equivalent to $` in perl. If this is a null string, then:
|
|
<p>first == second </p>
|
|
<p>And </p>
|
|
<p>matched == false.</p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">0</td>
|
|
<td valign="top" width="44%">Returns the whole of what
|
|
matched, equivalent to $& in perl. The matched
|
|
parameter is always true.</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">0 < N < size()</td>
|
|
<td valign="top" width="44%">Returns what matched sub-expression
|
|
N, if this sub-expression did not participate in the
|
|
match then <p>matched == false </p>
|
|
<p>otherwise: </p>
|
|
<p>matched == true.</p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="6%"> </td>
|
|
<td valign="top" width="44%">N < -2 or N >= size()</td>
|
|
<td valign="top" width="44%">Represents an out-of range
|
|
non-existent sub-expression. Returns a "null"
|
|
match in which <p>first == last </p>
|
|
<p>And </p>
|
|
<p>matched == false.</p>
|
|
</td>
|
|
<td valign="top" width="5%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p>Note that as well as being parameterised for an allocator,
|
|
match_results<> also takes an iterator type, this allows
|
|
any pair of iterators to be searched for a given regular
|
|
expression, provided the iterators have at least bi-directional
|
|
properties. <br>
|
|
</p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="query_match"></a>Algorithm regex_match</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The algorithm regex _match determines whether a given regular
|
|
expression matches a given sequence denoted by a pair of
|
|
iterators, the algorithm is defined as follows, note that the
|
|
result is true only if the expression matches the whole of the
|
|
input sequence, the main use of this function is data input
|
|
validation: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(iterator first,
|
|
iterator last,
|
|
match_results<iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p>The library also defines the following convenience versions,
|
|
which take either a const charT*, or a const std::basic_string<>&
|
|
in place of a pair of iterators [note - these versions may not be
|
|
available, or may be available in a more limited form, depending
|
|
upon your compilers capabilities]: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> charT* str,
|
|
match_results<<b>const</b> charT*, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default)
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p>Finally there is a set of convenience versions that simply
|
|
return true or false and do not indicate what matched: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(iterator first,
|
|
iterator last,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> charT* str,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default)
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_match(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
<b> const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
</pre>
|
|
|
|
<p>The parameters for the main function version are as follows: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="30"> </td>
|
|
<td width="319">iterator first</td>
|
|
<td>Denotes the start of the range to be matched.</td>
|
|
<td width="30"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="319">iterator last</td>
|
|
<td valign="top" width="319">Denotes the end of the range
|
|
to be matched.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="319">match_results<iterator,
|
|
Allocator>& m</td>
|
|
<td valign="top" width="319">An instance of match_results
|
|
in which what matched will be reported. On exit if a
|
|
match occurred then m[0] denotes the whole of the string
|
|
that matched, m[0].first must be equal to first, m[0].second
|
|
will be less than or equal to last. m[1] denotes the
|
|
first subexpression m[2] the second subexpression and so
|
|
on. If no match occurred then m[0].first = m[0].second =
|
|
last.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="319">const reg_expression<charT,
|
|
traits, Allocator2>& e</td>
|
|
<td valign="top" width="319">Contains the regular
|
|
expression to be matched.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="319">unsigned flags = match_default</td>
|
|
<td valign="top" width="319">Determines the semantics
|
|
used for matching, a combination of one or more <a
|
|
href="#match_type">match_flags</a> enumerators.</td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p>regex_match returns false if no match occurs or true if it
|
|
does. A match only occurs if it starts at <b>first</b> and
|
|
finishes at <b>last</b>. Example: the following <a
|
|
href="demo/snippets/snip1.cpp">example</a> processes an ftp
|
|
response: </p>
|
|
|
|
<pre><font color="#008000">#include <stdlib.h>
|
|
#include <boost/regex.hpp>
|
|
#include <string>
|
|
#include <iostream>
|
|
|
|
</font><font color="#000000"><b>using namespace</b> boost;
|
|
|
|
</font>regex expression(<font color="#000080">"([0-9]+)(\\-| |$)(.*)"</font>);
|
|
|
|
<font color="#000080"><i>// process_ftp:
|
|
// on success returns the ftp response code, and fills
|
|
// msg with the ftp response message. </i></font><b>
|
|
int</b> process_ftp(<b>const</b> <b>char</b>* response, std::string* msg)
|
|
{
|
|
cmatch what;
|
|
<b>if</b>(regex_match(response, what, expression))
|
|
{ <font color="#000080">
|
|
<i>// what[0] contains the whole string
|
|
</i> <i>// what[1] contains the response code </i>
|
|
<i>// what[2] contains the separator character
|
|
</i> <i>// what[3] contains the text message. </i></font>
|
|
<b>if</b>(msg)
|
|
msg->assign(what[3].first, what[3].second);
|
|
<b>return</b> std::atoi(what[1].first);
|
|
} <font color="#000080">
|
|
<i>// failure did not match
|
|
</i></font> <b>if</b>(msg)
|
|
msg->erase();
|
|
<b>return</b> -1;
|
|
}</pre>
|
|
|
|
<p><a name="match_type"></a>The value of the flags parameter
|
|
passed to the algorithm must be a combination of one or more of
|
|
the following values: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_default</td>
|
|
<td valign="top" width="45%">The default value, indicates
|
|
that <b>first</b> represents the start of a line, the
|
|
start of a buffer, and (possibly) the start of a word.
|
|
Also implies that <b>last</b> represents the end of a
|
|
line, the end of the buffer and (possibly) the end of a
|
|
word. Implies that a dot sub-expression "."
|
|
will match both the newline character and a null.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_bol</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
|
does not represent the start of a new line.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_eol</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
|
does not represent the end of a line.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_bob</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
|
is not the beginning of a buffer.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_eob</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
|
does not represent the end of a buffer.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_bow</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>first</b>
|
|
can never match the start of a word.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_eow</td>
|
|
<td valign="top" width="45%">When this flag is set then <b>last</b>
|
|
can never match the end of a word.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_dot_newline</td>
|
|
<td valign="top" width="45%">When this flag is set then a
|
|
dot expression "." can not match the newline
|
|
character.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="45%">match_not_dot_null</td>
|
|
<td valign="top" width="45%">When this flag is set then a
|
|
dot expression "." can not match a null
|
|
character.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="75"> </td>
|
|
<td valign="top" width="45%" height="75">match_prev_avail</td>
|
|
<td valign="top" width="45%" height="75">When this flag
|
|
is set, then *--<b>first</b> is a valid expression and
|
|
the flags match_not_bol and match_not_bow have no effect,
|
|
since the value of the previous character can be used to
|
|
check these.</td>
|
|
<td width="5%" height="75"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15">match_any</td>
|
|
<td valign="top" width="45%" height="15">When this flag
|
|
is set, then the first string matched is returned, rather
|
|
than the longest possible match. This flag can
|
|
significantly reduce the time taken to find a match, but
|
|
what matches is undefined.</td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15">match_not_null</td>
|
|
<td valign="top" width="45%" height="15">When this flag
|
|
is set, then the expression will never match a null
|
|
string.</td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td width="5%" height="15"> </td>
|
|
<td valign="top" width="45%" height="15">match_continuous</td>
|
|
<td valign="top" width="45%" height="15">When this flags
|
|
is set, then during a grep operation, each successive
|
|
match must start from where the previous match finished.</td>
|
|
<td width="5%" height="15"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_search"></a>Algorithm regex_search</h3>
|
|
|
|
<p> #include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The algorithm regex_search will search a range denoted by a
|
|
pair of iterators for a given regular expression. The algorithm
|
|
uses various heuristics to reduce the search time by only
|
|
checking for a match if a match could conceivably start at that
|
|
position. The algorithm is defined as follows: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(iterator first,
|
|
iterator last,
|
|
match_results<iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p>The library also defines the following convenience versions,
|
|
which take either a const charT*, or a const std::basic_string<>&
|
|
in place of a pair of iterators [note - these versions may not be
|
|
available, or may be available in a more limited form, depending
|
|
upon your compilers capabilities]: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(<b>const</b> charT* str,
|
|
match_results<<b>const</b> charT*, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator2>
|
|
<b>bool</b> regex_search(<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
match_results<<b>typename</b> std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
|
<b>const</b> reg_expression<charT, traits, Allocator2>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p>The parameters for the main function version are as follows: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="50%">iterator first</td>
|
|
<td valign="top" width="50%">The starting position of the
|
|
range to search.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">iterator last</td>
|
|
<td valign="top" width="50%">The ending position of the
|
|
range to search.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">match_results<iterator,
|
|
Allocator>& m</td>
|
|
<td valign="top" width="50%">An instance of match_results
|
|
in which what matched will be reported. On exit if a
|
|
match occurred then m[0] denotes the whole of the string
|
|
that matched, m[0].first and m[0].second will be less
|
|
than or equal to last. m[1] denotes the first sub-expression
|
|
m[2] the second sub-expression and so on. If no match
|
|
occurred then m[0].first = m[0].second = last.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">const reg_expression<charT,
|
|
traits, Allocator2>& e</td>
|
|
<td valign="top" width="50%">The regular expression to
|
|
search for.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">unsigned flags = match_default</td>
|
|
<td valign="top" width="50%">The flags that determine
|
|
what gets matched, a combination of one or more <a
|
|
href="#match_type">match_flags</a> enumerators.</td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><br>
|
|
</p>
|
|
|
|
<p>Example: the following <a href="demo/snippets/snip2.cpp">example</a>,
|
|
takes the contents of a file in the form of a string, and
|
|
searches for all the C++ class declarations in the file. The code
|
|
will work regardless of the way that std::string is implemented,
|
|
for example it could easily be modified to work with the SGI rope
|
|
class, which uses a non-contiguous storage strategy. </p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
</font><font color="#000080"><i>
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's </i></font><b>
|
|
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"); <b>
|
|
|
|
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
boost::match_results<std::string::const_iterator> what;
|
|
<b>unsigned</b> <b>int</b> flags = boost::match_default;
|
|
<b>while</b>(regex_search(start, end, what, expression, flags))
|
|
{ <font color="#000080">
|
|
<i>// what[0] contains the whole string
|
|
</i> <i>// what[5] contains the class name.
|
|
</i> <i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map: </i></font>
|
|
m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - file.begin();
|
|
<font color="#000080"><i>// update search position:
|
|
</i></font> start = what[0].second;
|
|
<font color="#000080"><i>// update flags: </i></font>
|
|
flags |= boost::match_prev_avail;
|
|
flags |= boost::match_not_bob;
|
|
}
|
|
}
|
|
</pre>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_grep"></a>Algorithm regex_grep</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p> Regex_grep allows you to search through an iterator
|
|
range and locate all the (non-overlapping) matches with a given
|
|
regular expression. The function is declared as: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> iterator, <b>class</b> charT, <b>class</b> traits, <b>class</b> Allocator>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
iterator first,
|
|
iterator last,
|
|
<b> const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b> unsigned</b> flags = match_default)</pre>
|
|
|
|
<p>The library also defines the following convenience versions,
|
|
which take either a const charT*, or a const std::basic_string<>&
|
|
in place of a pair of iterators [note - these versions may not be
|
|
available, or may be available in a more limited form, depending
|
|
upon your compilers capabilities]: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> Predicate, <b>class</b> charT, <b>class</b> Allocator, <b>class</b> traits>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
<b>const</b> charT* str,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits>
|
|
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
|
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>unsigned</b> flags = match_default);</pre>
|
|
|
|
<p>The parameters for the primary version of regex_grep have the
|
|
following meanings: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td width="5%"> </td>
|
|
<td valign="top" width="50%">foo</td>
|
|
<td valign="top" width="50%">A predicate function object
|
|
or function pointer, see below for more information.</td>
|
|
<td width="5%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">first</td>
|
|
<td valign="top" width="50%">The start of the range to
|
|
search.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">last</td>
|
|
<td valign="top" width="50%">The end of the range to
|
|
search.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">e</td>
|
|
<td valign="top" width="50%">The regular expression to
|
|
search for.</td>
|
|
<td> </td>
|
|
</tr>
|
|
<tr>
|
|
<td> </td>
|
|
<td valign="top" width="50%">flags</td>
|
|
<td valign="top" width="50%">The flags that determine how
|
|
matching is carried out, one of the <a href="#match_type">match_flags</a>
|
|
enumerators.</td>
|
|
<td> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p> The algorithm finds all of the non-overlapping matches
|
|
of the expression e, for each match it fills a <a
|
|
href="#reg_match">match_results</a><iterator, Allocator>
|
|
structure, which contains information on what matched, and calls
|
|
the predicate foo, passing the match_results<iterator,
|
|
Allocator> as a single argument. If the predicate returns true,
|
|
then the grep operation continues, otherwise it terminates
|
|
without searching for further matches. </p>
|
|
|
|
<p>The general form of the predicate is: </p>
|
|
|
|
<pre><b>struct</b> grep_predicate
|
|
{
|
|
<b> bool</b> <b>operator</b>()(<b>const</b> match_results<iterator_type, expression_type::alloc_type>& m);
|
|
};</pre>
|
|
|
|
<p>For example the regular expression "a*b" would find
|
|
one match in the string "aaaaab" and two in the string
|
|
"aaabb". </p>
|
|
|
|
<p>Remember this algorithm can be used for a lot more than
|
|
implementing a version of grep, the predicate can be and do
|
|
anything that you want, grep utilities would output the results
|
|
to the screen, another program could index a file based on a
|
|
regular expression and store a set of bookmarks in a list, or a
|
|
text file conversion utility would output to file. The results of
|
|
one regex_grep can even be chained into another regex_grep to
|
|
create recursive parsers. </p>
|
|
|
|
<p><a href="demo/snippets/snip3.cpp">Example</a>: convert the
|
|
example from <i>regex_search</i> to use <i>regex_grep</i> instead:
|
|
</p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
|
|
</font><font color="#000080"><i>// IndexClasses:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's </i></font><b>
|
|
|
|
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)"
|
|
"[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>); <b>
|
|
|
|
class</b> IndexClassesPred
|
|
{
|
|
map_type& m;
|
|
std::string::const_iterator base; <b>
|
|
public</b>:
|
|
IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {}
|
|
<b>bool</b> <b>operator</b>()(<b>const</b> match_results<std::string::const_iterator, regex::alloc_type>& what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string </i>
|
|
<i>// what[5] contains the class name. </i>
|
|
<i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map: </i></font>
|
|
m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
}; <b>
|
|
|
|
void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
regex_grep(IndexClassesPred(m, start), start, end, expression;
|
|
} </pre>
|
|
|
|
<p><a href="demo/snippets/snip5.cpp">Example</a>: Use regex_grep
|
|
to call a global callback function: </p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
|
|
</font><font color="#000080"><i>// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's </i></font><b>
|
|
|
|
typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
|
|
boost::regex expression(<font color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)"</font>);
|
|
|
|
map_type class_index;
|
|
std::string::const_iterator base;
|
|
|
|
<b>bool</b> grep_callback(<b>const</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type>& what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string </i>
|
|
<i>// what[5] contains the class name. </i>
|
|
<i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map: </i></font>
|
|
class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
<b>
|
|
void</b> IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
regex_grep(grep_callback, start, end, expression, match_default);
|
|
}
|
|
</pre>
|
|
|
|
<p><a href="demo/snippets/snip6.cpp">Example</a>: use regex_grep
|
|
to call a class member function, use the standard library
|
|
adapters <i>std::mem_fun</i> and <i>std::bind1st</i> to convert
|
|
the member function into a predicate: </p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
#include <functional> </font><font color="#000080"><i>
|
|
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
|
|
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type; <b>
|
|
|
|
class</b> class_index
|
|
{
|
|
boost::regex expression;
|
|
map_type index;
|
|
std::string::const_iterator base;
|
|
<b>bool</b> grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what);
|
|
<b>public</b>:
|
|
<b> void</b> IndexClasses(<b>const</b> std::string& file);
|
|
class_index()
|
|
: index(),
|
|
expression(<font
|
|
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
|
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
|
"(\\{|:[^;\\{()]*\\{)"
|
|
</font> ){}
|
|
}; <b>
|
|
|
|
bool</b> class_index::grep_callback(boost::match_results<std::string::const_iterator, boost::regex::alloc_type> what)
|
|
{
|
|
<font color="#000080"> <i>// what[0] contains the whole string </i>
|
|
<i>// what[5] contains the class name. </i>
|
|
<i>// what[6] contains the template specialisation if any.
|
|
</i> <i>// add class name and position to map: </i></font>
|
|
index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
|
|
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), <b>this</b>),
|
|
start,
|
|
end,
|
|
expression);
|
|
}
|
|
</pre>
|
|
|
|
<p><a href="demo/snippets/snip7.cpp">Finally</a>, C++ Builder
|
|
users can use C++ Builder's closure type as a callback argument: </p>
|
|
|
|
<pre><font color="#008000">#include <string>
|
|
#include <map>
|
|
#include <boost/regex.hpp>
|
|
#include <functional> </font><font color="#000080"><i>
|
|
|
|
// purpose:
|
|
// takes the contents of a file in the form of a string
|
|
// and searches for all the C++ class definitions, storing
|
|
// their locations in a map of strings/int's
|
|
|
|
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
|
<b>class</b> class_index
|
|
{
|
|
boost::regex expression;
|
|
map_type index;
|
|
std::string::const_iterator base;
|
|
<b>typedef</b> boost::match_results<std::string::const_iterator, boost::regex::alloc_type> arg_type;
|
|
<b>bool</b> grep_callback(<b>const</b> arg_type& what); <b>
|
|
public</b>:
|
|
<b>typedef</b> <b>bool</b> (<b>__closure</b>* grep_callback_type)(<b>const</b> arg_type&);
|
|
<b>void</b> IndexClasses(<b>const</b> std::string& file);
|
|
class_index()
|
|
: index(),
|
|
expression(<font
|
|
color="#000080">"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
|
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
|
"(\\{|:[^;\\{()]*\\{)"
|
|
</font> ){}
|
|
};
|
|
|
|
<b>bool</b> class_index::grep_callback(<b>const</b> arg_type& what)
|
|
{ <font color="#000080">
|
|
<i>// what[0] contains the whole string </i> <i>
|
|
// what[5] contains the class name. </i> <i>
|
|
// what[6] contains the template specialisation if any. </i>
|
|
<i>// add class name and position to map: </i></font>
|
|
index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
|
what[5].first - base;
|
|
<b>return</b> <b>true</b>;
|
|
}
|
|
|
|
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
|
{
|
|
std::string::const_iterator start, end;
|
|
start = file.begin();
|
|
end = file.end();
|
|
base = start;
|
|
class_index::grep_callback_type cl = &(<b>this</b>->grep_callback);
|
|
regex_grep(cl,
|
|
start,
|
|
end,
|
|
expression);
|
|
}
|
|
</pre>
|
|
|
|
<hr>
|
|
|
|
<h3> <a name="reg_format"></a>Algorithm regex_format</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The algorithm regex_format takes the results of a match and
|
|
creates a new string based upon a <a
|
|
href="format_string.htm#format_string">format string</a>, regex_format
|
|
can be used for search and replace operations: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_format(OutputIterator out,
|
|
<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> charT* fmt,
|
|
<b>unsigned</b> flags = 0);
|
|
<b>
|
|
template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_format(OutputIterator out,
|
|
<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b>unsigned</b> flags = 0);</pre>
|
|
|
|
<p>The library also defines the following convenience variation
|
|
of regex_format, which returns the result directly as a string,
|
|
rather than outputting to an iterator [note - this version may
|
|
not be available, or may be available in a more limited form,
|
|
depending upon your compilers capabilities]: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_format
|
|
(<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> charT* fmt,
|
|
<b>unsigned</b> flags = 0);
|
|
|
|
<b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_format
|
|
(<b>const</b> match_results<iterator, Allocator>& m,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b>unsigned</b> flags = 0);</pre>
|
|
|
|
<p>Parameters to the main version of the function are passed as
|
|
follows: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%">OutputIterator out</td>
|
|
<td valign="top" width="44%">An output iterator type, the
|
|
output string is sent to this iterator. Typically this
|
|
would be a std::ostream_iterator.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><b>const</b> match_results<iterator,
|
|
Allocator>& m</td>
|
|
<td valign="top" width="44%">An instance of match_results<>
|
|
obtained from one of the matching algorithms above, and
|
|
denoting what matched.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><b>const</b> charT* fmt</td>
|
|
<td valign="top" width="44%">A format string that
|
|
determines how the match is transformed into the new
|
|
string.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%"><b>unsigned</b> flags</td>
|
|
<td valign="top" width="44%">Optional flags which
|
|
describe how the format string is to be interpreted.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><a name="format_flags"></a>Format flags are defined as follows:
|
|
<br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%">format_all</td>
|
|
<td valign="top" width="43%">Enables all syntax options (perl-like
|
|
plus extentions).</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%">format_sed</td>
|
|
<td valign="top" width="43%">Allows only a sed-like
|
|
syntax.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%">format_perl</td>
|
|
<td valign="top" width="43%">Allows only a perl-like
|
|
syntax.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="9%"> </td>
|
|
<td valign="top" width="39%">format_no_copy</td>
|
|
<td valign="top" width="43%">Disables copying of
|
|
unmatched sections to the output string during <a
|
|
href="#reg_merge">regex_merge</a> operations.</td>
|
|
<td valign="top" width="9%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p><br>
|
|
</p>
|
|
|
|
<p>The format string syntax (and available options) is described
|
|
more fully under <a href="format_string.htm#format_string">format
|
|
strings</a>. <br>
|
|
</p>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="reg_merge"></a>Algorithm regex_merge</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>The algorithm regex_merge is a combination of <a
|
|
href="#reg_grep">regex_grep</a> and <a href="#reg_format">regex_format</a>.
|
|
That is, it greps through the string finding all the matches to
|
|
the regular expression, for each match it then calls regex_format
|
|
to format the string and sends the result to the output iterator.
|
|
Sections of text that do not match are copied to the output
|
|
unchanged only if the flags parameter does not have the flag <a
|
|
href="#format_flags">format_no_copy</a> set. </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_merge(OutputIterator out,
|
|
iterator first,
|
|
iterator last,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> charT* fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
OutputIterator regex_merge(OutputIterator out,
|
|
iterator first,
|
|
iterator last,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
std::basic_string<charT>& fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
|
|
|
<p>The library also defines the following convenience variation
|
|
of regex_merge, which returns the result directly as a string,
|
|
rather than outputting to an iterator [note - this version may
|
|
not be available, or may be available in a more limited form,
|
|
depending upon your compilers capabilities]: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> charT* fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> traits, <b>class</b> Allocator, <b>class</b> charT>
|
|
std::basic_string<charT> regex_merge(<b>const</b> std::basic_string<charT>& text,
|
|
<b>const</b> reg_expression<charT, traits, Allocator>& e,
|
|
<b>const</b> std::basic_string<charT>& fmt,
|
|
<b> unsigned</b> <b>int</b> flags = match_default);</pre>
|
|
|
|
<p>Parameters to the main version of the function are passed as
|
|
follows: <br>
|
|
</p>
|
|
|
|
<table border="0" cellpadding="7" cellspacing="0" width="100%">
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%">OutputIterator out</td>
|
|
<td valign="top" width="45%">An output iterator type, the
|
|
output string is sent to this iterator. Typically this
|
|
would be a std::ostream_iterator.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%">iterator first</td>
|
|
<td valign="top" width="45%">The start of the range of
|
|
text to grep.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%">iterator last</td>
|
|
<td valign="top" width="45%">The end of the range of text
|
|
to grep.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><b>const</b> reg_expression<charT,
|
|
traits, Allocator>& e</td>
|
|
<td valign="top" width="45%">The expression to search for.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><b>const</b> charT* fmt</td>
|
|
<td valign="top" width="45%">The format string to be
|
|
applied to sections of text that match.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
<tr>
|
|
<td valign="top" width="7%"> </td>
|
|
<td valign="top" width="40%"><b>unsigned</b> <b>int</b>
|
|
flags = match_default</td>
|
|
<td valign="top" width="45%">Flags which determine how
|
|
the expression is matched - see <a href="#match_type">match_flags</a>,
|
|
and how the format string is interpreted - see <a
|
|
href="#format_flags">format_flags</a>.</td>
|
|
<td valign="top" width="8%"> </td>
|
|
</tr>
|
|
</table>
|
|
|
|
<p>Example: the following <a href="demo/snippets/snip4.cpp">example</a>
|
|
takes C/C++ source code as input, and outputs syntax highlighted
|
|
HTML code. </p>
|
|
|
|
<pre><font color="#008040">#include <iostream></font>
|
|
<font color="#008040">#include <fstream></font>
|
|
<font color="#008040">#include <sstream></font>
|
|
<font color="#008040">#include <string></font>
|
|
<font color="#008040">#include <iterator></font>
|
|
<font color="#008040">#include <boost/regex.hpp></font>
|
|
<font color="#008040">#include <fstream></font>
|
|
<font color="#008040">#include <iostream></font>
|
|
|
|
<font color="#000080"><i>// purpose:</i></font>
|
|
<font color="#000080"><i>// takes the contents of a file and transform to</i></font>
|
|
<font color="#000080"><i>// syntax highlighted code in html format</i></font>
|
|
|
|
boost::regex e1, e2;
|
|
<b>extern</b> <b>const</b> <b>char</b>* expression_text;
|
|
<b>extern</b> <b>const</b> <b>char</b>* format_string;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_expression;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_format;
|
|
<b>extern</b> <b>const</b> <b>char</b>* header_text;
|
|
<b>extern</b> <b>const</b> <b>char</b>* footer_text;
|
|
|
|
<b>void</b> load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
<b>char</b> c;
|
|
<b>while</b>(is.get(c))
|
|
{
|
|
<b>if</b>(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * <font color="#000080">3</font>);
|
|
s.append(<font color="#000080">1</font>, c);
|
|
}
|
|
}
|
|
|
|
<b>int</b> main(<b>int</b> argc, <b>const</b> <b>char</b>** argv)
|
|
{
|
|
e1.set_expression(expression_text);
|
|
e2.set_expression(pre_expression);
|
|
<b>for</b>(<b>int</b> i = <font color="#0000A0">1</font>; i < argc; ++i)
|
|
{
|
|
std::cout << <font color="#0000FF">"Processing file "</font> << argv[i] << std::endl;
|
|
std::ifstream fs(argv[i]);
|
|
std::string in;
|
|
load_file(in, fs);
|
|
std::string out_name(std::string(argv[i]) + std::string(<font
|
|
color="#0000FF">".htm"</font>));
|
|
std::ofstream os(out_name.c_str());
|
|
os << header_text;
|
|
<font color="#000080"><i>// strip '<' and '>' first by outputting to a</i></font>
|
|
<font color="#000080"><i>// temporary string stream</i></font>
|
|
std::ostringstream t(std::ios::out | std::ios::binary);
|
|
std::ostream_iterator<<b>char</b>, <b>char</b>> oi(t);
|
|
regex_merge(oi, in.begin(), in.end(), e2, pre_format);
|
|
<font color="#000080"><i>// then output to final output stream</i></font>
|
|
<font color="#000080"><i>// adding syntax highlighting:</i></font>
|
|
std::string s(t.str());
|
|
std::ostream_iterator<<b>char</b>, <b>char</b>> out(os);
|
|
regex_merge(out, s.begin(), s.end(), e1, format_string);
|
|
os << footer_text;
|
|
}
|
|
<b>return</b> <font color="#0000A0">0</font>;
|
|
}
|
|
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_expression = <font
|
|
color="#0000FF">"(<)|(>)|\\r"</font>;
|
|
<b>extern</b> <b>const</b> <b>char</b>* pre_format = <font
|
|
color="#0000FF">"(?1<)(?2>)"</font>;
|
|
|
|
|
|
<b>const</b> <b>char</b>* expression_text = <font color="#000080"><i>// preprocessor directives: index 1</i></font>
|
|
<font color="#0000FF">"(^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*)|"</font>
|
|
<font color="#000080"><i>// comment: index 3</i></font>
|
|
<font color="#0000FF">"(//[^\\n]*|/\\*([^*]|\\*+[^*/])*\\*+/)|"</font>
|
|
<font color="#000080"><i>// literals: index 5</i></font>
|
|
<font color="#0000FF">"\\<([+-]?((0x[[:xdigit:]]+)|(([[:digit:]]*\\.)?[[:digit:]]+([eE][+-]?[[:digit:]]+)?))u?((int(8|16|32|64))|L)?)\\>|"</font>
|
|
<font color="#000080"><i>// string literals: index 14</i></font>
|
|
<font color="#0000FF">"('([^\\\\']|\\\\.)*'|\"([^\\\\\"]|\\\\.)*\")|"</font>
|
|
<font color="#000080"><i>// keywords: index 17</i></font>
|
|
<font color="#0000FF">"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"</font>
|
|
<font color="#0000FF">"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"</font>
|
|
<font color="#0000FF">"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"</font>
|
|
<font color="#0000FF">"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"</font>
|
|
<font color="#0000FF">"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"</font>
|
|
<font color="#0000FF">"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"</font>
|
|
<font color="#0000FF">"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"</font>
|
|
<font color="#0000FF">"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"</font>
|
|
<font color="#0000FF">"|using|virtual|void|volatile|wchar_t|while)\\>"</font>
|
|
;
|
|
|
|
<b>const</b> <b>char</b>* format_string = <font color="#0000FF">"(?1<font color=\"#008040\">$&</font>)"</font>
|
|
<font color="#0000FF">"(?3<I><font color=\"#000080\">$&</font></I>)"</font>
|
|
<font color="#0000FF">"(?5<font color=\"#0000A0\">$&</font>)"</font>
|
|
<font color="#0000FF">"(?14<font color=\"#0000FF\">$&</font>)"</font>
|
|
<font color="#0000FF">"(?17<B>$&</B>)"</font>;
|
|
|
|
<b>const</b> <b>char</b>* header_text = <font color="#0000FF">"<HTML>\n<HEAD>\n"</font>
|
|
<font color="#0000FF">"<TITLE>Auto-generated html formated source</TITLE>\n"</font>
|
|
<font color="#0000FF">"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"</font>
|
|
<font color="#0000FF">"</HEAD>\n"</font>
|
|
<font color="#0000FF">"<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffff99\">\n"</font>
|
|
<font color="#0000FF">"<P> </P>\n<PRE>"</font>;
|
|
|
|
<b>const</b> <b>char</b>* footer_text = <font color="#0000FF">"</PRE>\n</BODY>\n\n"</font>;
|
|
|
|
</pre>
|
|
|
|
<hr>
|
|
|
|
<h3><a name="regex_split"></a>Algorithm regex_split</h3>
|
|
|
|
<p>#include <<a href="../../boost/regex.hpp">boost/regex.hpp</a>>
|
|
</p>
|
|
|
|
<p>Algorithm regex_split performs a similar operation to the perl
|
|
split operation, and comes in three overloaded forms: </p>
|
|
|
|
<pre><b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
|
<b> unsigned</b> flags,
|
|
std::size_t max_split);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1, <b>class</b> Traits2, <b>class</b> Alloc2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
<b> const</b> reg_expression<charT, Traits2, Alloc2>& e,
|
|
<b>unsigned</b> flags = match_default);
|
|
|
|
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> charT, <b>class</b> Traits1, <b>class</b> Alloc1>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s);</pre>
|
|
|
|
<p>Each version takes an output-iterator for output, and a string
|
|
for input. If the expression contains no marked sub-expressions,
|
|
then the algorithm writes one string onto the output-iterator for
|
|
each section of input that does not match the expression. If the
|
|
expression does contain marked sub-expressions, then each time a
|
|
match is found, one string for each marked sub-expression will be
|
|
written to the output-iterator. No more than <i>max_split </i>strings
|
|
will be written to the output-iterator. Before returning, all the
|
|
input processed will be deleted from the string <i>s</i> (if <i>max_split
|
|
</i>is not reached then all of <i>s</i> will be deleted). Returns
|
|
the number of strings written to the output-iterator. If the
|
|
parameter <i>max_split</i> is not specified then it defaults to
|
|
UINT_MAX. If no expression is specified, then it defaults to
|
|
"\s+", and splitting occurs on whitespace. </p>
|
|
|
|
<p><a href="demo/snippets/snip8.cpp">Example</a>: the following
|
|
function will split the input string into a series of tokens, and
|
|
remove each token from the string <i>s</i>: </p>
|
|
|
|
<pre><b>unsigned</b> tokenise(std::list<std::string>& l, std::string& s)
|
|
{
|
|
<b> return</b> boost::regex_split(std::back_inserter(l), s);
|
|
}</pre>
|
|
|
|
<p><a href="demo/snippets/snip9.cpp">Example</a>: the following
|
|
short program will extract all of the URL's from a html file, and
|
|
print them out to <i>cout</i>: </p>
|
|
|
|
<pre><font color="#008000">#include <list>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <boost/regex.hpp>
|
|
</font>
|
|
boost::regex e(<font color="#000080">"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</font>,
|
|
boost::regbase::normal | boost::regbase::icase);
|
|
|
|
<b>void</b> load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
<font color="#000080">//
|
|
// attempt to grow string buffer to match file size,
|
|
// this doesn't always work...</font>
|
|
s.reserve(is.rdbuf()-&gtin_avail());
|
|
<b>char</b> c;
|
|
<b>while</b>(is.get(c))
|
|
{
|
|
<font color="#000080">// use logarithmic growth stategy, in case
|
|
// in_avail (above) returned zero:</font>
|
|
<b>if</b>(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
|
|
|
|
<b>int</b> main(<b>int</b> argc, <b>char</b>** argv)
|
|
{
|
|
std::string s;
|
|
std::list<std::string> l;
|
|
|
|
<b>for</b>(<b>int</b> i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << <font color="#000080">"Findings URL's in "</font> << argv[i] << <font
|
|
color="#000080">":"</font> << std::endl;
|
|
s.erase();
|
|
std::ifstream is(argv[i]);
|
|
load_file(s, is);
|
|
boost::regex_split(std::back_inserter(l), s, e);
|
|
<b>while</b>(l.size())
|
|
{
|
|
s = *(l.begin());
|
|
l.pop_front();
|
|
std::cout << s << std::endl;
|
|
}
|
|
}
|
|
<b>return</b> 0;
|
|
}</pre>
|
|
|
|
<hr>
|
|
|
|
<p><i>Copyright </i><a href="mailto:John_Maddock@compuserve.com"><i>Dr
|
|
John Maddock</i></a><i> 1998-2000 all rights reserved.</i> </p>
|
|
</body>
|
|
</html>
|