forked from boostorg/regex
Compare commits
1 Commits
overflow_f
...
boost-1.34
Author | SHA1 | Date | |
---|---|---|---|
36e73c1aac |
@ -1,81 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: regex_error</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">class regex_error</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Synopsis</h3>
|
||||
<p>#include <<a href="../../../boost/regex/pattern_except.hpp">boost/pattern_except.hpp</a>></p>
|
||||
<p>The class <code>regex_error</code> defines the type of objects thrown as
|
||||
exceptions to report errors during the conversion from a string representing a
|
||||
regular expression to a finite state machine. </p>
|
||||
<pre>
|
||||
<b>namespace</b> boost{
|
||||
|
||||
<b>class</b> regex_error : <b>public</b> std::runtime_error
|
||||
{
|
||||
<b>public</b>:
|
||||
<b>explicit</b> regex_error(<b>const</b> std::string& s, <a href="error_type.html">regex_constants::error_type err</a>, std::ptrdiff_t pos);
|
||||
<b>explicit</b> regex_error(<a href="error_type.html">boost::regex_constants::error_type err</a>);
|
||||
<a href="error_type.html">boost::regex_constants::error_type</a> code()<b>const</b>;
|
||||
std::ptrdiff_t position()<b>const</b>;
|
||||
};
|
||||
|
||||
typedef regex_error bad_pattern; // for backwards compatibility
|
||||
typedef regex_error bad_expression; // for backwards compatibility
|
||||
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<h3>Description</h3>
|
||||
<pre>
|
||||
regex_error(<b>const</b> std::string& s, <a href="error_type.html">regex_constants::error_type</a> err, std::ptrdiff_t pos);
|
||||
regex_error(<a href="error_type.html">boost::regex_constants::error_type err</a>);</pre>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>regex_error</code>.</p>
|
||||
<pre>
|
||||
<a href="error_type.html">boost::regex_constants::error_type</a> code()<b>const</b>;</pre>
|
||||
<p><b>Effects:</b> returns the error code that represents parsing error that occurred.</p>
|
||||
<pre>
|
||||
std::ptrdiff_t position()<b>const</b>; </pre>
|
||||
<p><b>Effects:</b> returns the location in the expression where parsing stopped.</p>
|
||||
<P>Footnotes: the choice of <code>std::runtime_error</code> as the base class for <code>
|
||||
regex_error</code> is moot; depending upon how the library is used
|
||||
exceptions may be either logic errors (programmer supplied expressions) or run
|
||||
time errors (user supplied expressions). The library previously used <code>bad_pattern</code>
|
||||
and <code>bad_expression</code> for errors, these have been replaced by the
|
||||
single class <code>regex_error</code> to keep the library in synchronization
|
||||
with the standardization proposal.</P>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,906 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: basic_regex</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">basic_regex</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Synopsis</h3>
|
||||
<pre>
|
||||
#include <<a href="../../../boost/regex.hpp">boost/regex.hpp</a>>
|
||||
</pre>
|
||||
<p>The template class <em>basic_regex</em> encapsulates regular expression parsing
|
||||
and compilation. The class takes two template parameters:</p>
|
||||
<p><b><i>charT</i></b>: determines the character type, i.e. either char or
|
||||
wchar_t; see <EM><A href="concepts.html#charT">charT concept</A></EM>.</p>
|
||||
<p><b><i>traits</i></b>: determines the behavior of the character type, for
|
||||
example which character class names are recognized. A default traits class is
|
||||
provided: <a href="regex_traits.html">regex_traits<charT></a>. See
|
||||
also <EM><A href="concepts.html#traits">traits concept</A></EM>.</p>
|
||||
<p>For ease of use there are two typedefs that define the two standard <i>basic_regex</i>
|
||||
instances, unless you want to use custom traits classes or non-standard
|
||||
character types, you won't need to use anything other than these:</p>
|
||||
<pre>
|
||||
<b>namespace</b> boost{
|
||||
<b>template</b> <<b>class</b> charT, <b>class</b> traits = regex_traits<charT> >
|
||||
<b>class</b> basic_regex;
|
||||
<b>typedef</b> basic_regex<<b>char</b>> regex;
|
||||
<b>typedef</b> basic_regex<<b>wchar_t></b> wregex;
|
||||
}
|
||||
</pre>
|
||||
<p>The definition of <i>basic_regex</i> follows: it is based very closely on class
|
||||
basic_string, and fulfils the requirements for a constant-container of <i>charT</i>.</p>
|
||||
<pre>
|
||||
namespace boost{
|
||||
|
||||
template <class charT, class traits = regex_traits<charT> >
|
||||
class basic_regex {
|
||||
public:
|
||||
// types:
|
||||
typedef charT value_type;
|
||||
typedef implementation-specific const_iterator;
|
||||
typedef const_iterator iterator;
|
||||
typedef charT& reference;
|
||||
typedef const charT& const_reference;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef std::size_t size_type;
|
||||
typedef regex_constants::syntax_option_type flag_type;
|
||||
typedef typename traits::locale_type locale_type;
|
||||
|
||||
// constants:
|
||||
// main option selection:
|
||||
static const regex_constants::syntax_option_type normal = regex_constants::normal;
|
||||
static const regex_constants::syntax_option_type ECMAScript = normal;
|
||||
static const regex_constants::syntax_option_type JavaScript = normal;
|
||||
static const regex_constants::syntax_option_type JScript = normal;
|
||||
static const regex_constants::syntax_option_type basic = regex_constants::basic;
|
||||
static const regex_constants::syntax_option_type extended = regex_constants::extended;
|
||||
static const regex_constants::syntax_option_type awk = regex_constants::awk;
|
||||
static const regex_constants::syntax_option_type grep = regex_constants::grep;
|
||||
static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
|
||||
static const regex_constants::syntax_option_type sed = basic = regex_constants::sed;
|
||||
static const regex_constants::syntax_option_type perl = regex_constants::perl;
|
||||
static const regex_constants::syntax_option_type literal = regex_constants::literal;
|
||||
// modifiers specific to perl expressions:
|
||||
static const regex_constants::syntax_option_type no_mod_m = regex_constants::no_mod_m;
|
||||
static const regex_constants::syntax_option_type no_mod_s = regex_constants::no_mod_s;
|
||||
static const regex_constants::syntax_option_type mod_s = regex_constants::mod_s;
|
||||
static const regex_constants::syntax_option_type mod_x = regex_constants::mod_x;
|
||||
// modifiers specific to POSIX basic expressions:
|
||||
static const regex_constants::syntax_option_type bk_plus_qm = regex_constants::bk_plus_qm;
|
||||
static const regex_constants::syntax_option_type bk_vbar = regex_constants::bk_vbar
|
||||
static const regex_constants::syntax_option_type no_char_classes = regex_constants::no_char_classes
|
||||
static const regex_constants::syntax_option_type no_intervals = regex_constants::no_intervals
|
||||
// common modifiers:
|
||||
static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
|
||||
static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
|
||||
static const regex_constants::syntax_option_type collate = regex_constants::collate;
|
||||
static const regex_constants::syntax_option_type newline_alt = regex_constants::newline_alt;
|
||||
static const regex_constants::syntax_option_type no_except = regex_constants::newline_alt;
|
||||
|
||||
// construct/copy/destroy:
|
||||
explicit <A href="#c1">basic_regex</A> ();
|
||||
explicit <A href="#c2">basic_regex</A>(const charT* p, flag_type f = regex_constants::normal);
|
||||
<A href="#c3">basic_regex</A>(const charT* p1, const charT* p2, flag_type f = regex_constants::normal);
|
||||
<A href="#c4">basic_regex</A>(const charT* p, size_type len, flag_type f);
|
||||
<A href="#c5">basic_regex</A>(const basic_regex&);
|
||||
template <class ST, class SA>
|
||||
explicit <A href="#c6">basic_regex</A>(const basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal);
|
||||
template <class InputIterator>
|
||||
<A href="#c7">basic_regex</A>(InputIterator first, InputIterator last, flag_type f = regex_constants::normal);
|
||||
|
||||
~basic_regex();
|
||||
basic_regex& <A href="#o1">operator</A>=(const basic_regex&);
|
||||
basic_regex& <A href="#o2">operator</A>= (const charT* ptr);
|
||||
template <class ST, class SA>
|
||||
basic_regex& <A href="#o3">operator</A>= (const basic_string<charT, ST, SA>& p);
|
||||
// iterators:
|
||||
const_iterator <A href="#m1">begin</A>() const;
|
||||
const_iterator <A href="#m2">end</A>() const;
|
||||
// capacity:
|
||||
size_type <A href="#m3">size</A>() const;
|
||||
size_type <A href="#m4">max_size</A>() const;
|
||||
bool <A href="#m5">empty</A>() const;
|
||||
unsigned <A href="#m6">mark_count</A>()const;
|
||||
//
|
||||
// modifiers:
|
||||
basic_regex& <A href="#a1">assign</A>(const basic_regex& that);
|
||||
basic_regex& <A href="#a2">assign</A>(const charT* ptr, flag_type f = regex_constants::normal);
|
||||
basic_regex& <A href="#a3">assign</A>(const charT* ptr, unsigned int len, flag_type f);
|
||||
template <class string_traits, class A>
|
||||
basic_regex& <A href="#a4">assign</A>(const basic_string<charT, string_traits, A>& s,
|
||||
flag_type f = regex_constants::normal);
|
||||
template <class InputIterator>
|
||||
basic_regex& <A href="#a5">assign</A>(InputIterator first, InputIterator last,
|
||||
flag_type f = regex_constants::normal);
|
||||
|
||||
// const operations:
|
||||
flag_type <A href="#m8">flags</A>() const;
|
||||
int <A href="#m8b">status</A>()const;
|
||||
basic_string<charT> <A href="#m9">str</A>() const;
|
||||
int <A href="#m10">compare</A>(basic_regex&) const;
|
||||
// locale:
|
||||
locale_type <A href="#m11">imbue</A>(locale_type loc);
|
||||
locale_type <A href="#m12">getloc</A>() const;
|
||||
// swap
|
||||
void <A href="#m13">swap</A>(basic_regex&) throw();
|
||||
};
|
||||
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o4">operator</A> == (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o5">operator</A> != (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o7">operator</A> < (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o8">operator</A> <= (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o9">operator</A> >= (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
template <class charT, class traits>
|
||||
bool <A href="#o10">operator</A> > (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
|
||||
template <class charT, class io_traits, class re_traits>
|
||||
basic_ostream<charT, io_traits>&
|
||||
<A href="#o11">operator</A> << (basic_ostream<charT, io_traits>& os,
|
||||
const basic_regex<charT, re_traits>& e);
|
||||
|
||||
template <class charT, class traits>
|
||||
void <A href="#o12">swap</A>(basic_regex<charT, traits>& e1,
|
||||
basic_regex<charT, traits>& e2);
|
||||
|
||||
typedef basic_regex<char> regex;
|
||||
typedef basic_regex<wchar_t> wregex;
|
||||
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<h3>Description</h3>
|
||||
<p>Class <em>basic_regex</em> has the following public member functions:</p>
|
||||
<h4>basic_regex constants</h4>
|
||||
<pre>
|
||||
// main option selection:
|
||||
static const regex_constants::syntax_option_type normal = regex_constants::normal;
|
||||
static const regex_constants::syntax_option_type ECMAScript = normal;
|
||||
static const regex_constants::syntax_option_type JavaScript = normal;
|
||||
static const regex_constants::syntax_option_type JScript = normal;
|
||||
static const regex_constants::syntax_option_type basic = regex_constants::basic;
|
||||
static const regex_constants::syntax_option_type extended = regex_constants::extended;
|
||||
static const regex_constants::syntax_option_type awk = regex_constants::awk;
|
||||
static const regex_constants::syntax_option_type grep = regex_constants::grep;
|
||||
static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
|
||||
static const regex_constants::syntax_option_type sed = regex_constants::sed;
|
||||
static const regex_constants::syntax_option_type perl = regex_constants::perl;
|
||||
static const regex_constants::syntax_option_type literal = regex_constants::literal;
|
||||
// modifiers specific to perl expressions:
|
||||
static const regex_constants::syntax_option_type no_mod_m = regex_constants::no_mod_m;
|
||||
static const regex_constants::syntax_option_type no_mod_s = regex_constants::no_mod_s;
|
||||
static const regex_constants::syntax_option_type mod_s = regex_constants::mod_s;
|
||||
static const regex_constants::syntax_option_type mod_x = regex_constants::mod_x;
|
||||
// modifiers specific to POSIX basic expressions:
|
||||
static const regex_constants::syntax_option_type bk_plus_qm = regex_constants::bk_plus_qm;
|
||||
static const regex_constants::syntax_option_type bk_vbar = regex_constants::bk_vbar
|
||||
static const regex_constants::syntax_option_type no_char_classes = regex_constants::no_char_classes
|
||||
static const regex_constants::syntax_option_type no_intervals = regex_constants::no_intervals
|
||||
// common modifiers:
|
||||
static const regex_constants::syntax_option_type nosubs = regex_constants::nosubs;
|
||||
static const regex_constants::syntax_option_type optimize = regex_constants::optimize;
|
||||
static const regex_constants::syntax_option_type collate = regex_constants::collate;
|
||||
static const regex_constants::syntax_option_type newline_alt = regex_constants::newline_alt;
|
||||
</pre>
|
||||
<p>The static constant members are provided as synonyms for the constants declared
|
||||
in namespace <code>boost::regex_constants</code>; for each constant of type <code><A href="syntax_option_type.html">
|
||||
syntax_option_type</A></code> declared in namespace <code>boost::regex_constants</code>
|
||||
then a constant with the same name, type and value is declared within the scope
|
||||
of <code>basic_regex</code>.</p>
|
||||
<h4>basic_regex constructors</h4>
|
||||
<pre><A name=c1> basic_regex();
|
||||
</pre>
|
||||
<P><b>Effects:</b> Constructs an object of class <code>basic_regex</code>. The
|
||||
postconditions of this function are indicated in the table:</P>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table2" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>true</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>0</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>basic_string<charT>()</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre><A name=c2><BR> basic_regex(const charT* p, flag_type f = regex_constants::normal);
|
||||
|
||||
</pre>
|
||||
<P><b>Requires:</b> <i>p</i> shall not be a null pointer.</P>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if <i>p</i> is not a valid regular
|
||||
expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code>; the
|
||||
object's internal finite state machine is constructed from the regular
|
||||
expression contained in the null-terminated string <i>p</i>, and interpreted
|
||||
according to the <a href="syntax_option_type.html">option flags</a> specified
|
||||
in <i>f</i>. The postconditions of this function are indicated in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table3" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>char_traits<charT>::length(p)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>basic_string<charT>(p)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre>
|
||||
|
||||
</pre>
|
||||
<PRE><A name=c3></A>basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal);</PRE>
|
||||
<p><b>Requires:</b> <i>p1</i> and <i>p2</i> are not null pointers, <code>p1 < p2</code>.</p>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if [p1,p2) is not a valid regular
|
||||
expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code>; the
|
||||
object's internal finite state machine is constructed from the regular
|
||||
expression contained in the sequence of characters [p1,p2), and interpreted
|
||||
according the <a href="syntax_option_type.html">option flags</a> specified in <i>f</i>.
|
||||
The postconditions of this function are indicated in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table4" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>std::distance(p1,p2)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>basic_string<charT>(p1,p2)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre>
|
||||
|
||||
</pre>
|
||||
<pre><A name=c4></A>basic_regex(const charT* p, size_type len, flag_type f);
|
||||
</pre>
|
||||
<p><b>Requires:</b> <i>p</i> shall not be a null pointer, <code>len < max_size()</code>.</p>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if <i>p</i> is not a valid regular
|
||||
expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code>; the
|
||||
object's internal finite state machine is constructed from the regular
|
||||
expression contained in the sequence of characters [p, p+len), and interpreted
|
||||
according the <a href="syntax_option_type.html">option flags</a> specified in <i>f</i>.
|
||||
The postconditions of this function are indicated in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table5" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>len</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>basic_string<charT>(p, len)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre><A name=c5></A><BR>basic_regex(const basic_regex& e);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code> as a
|
||||
copy of the object <i>e</i>. The postconditions of this function are indicated
|
||||
in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table6" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>e.empty()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>e.size()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>e.str()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>e.flags()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>e.mark_count()</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre><BR>
|
||||
template <class ST, class SA>
|
||||
<A name=c6></A>basic_regex(const basic_string<charT, ST, SA>& s, flag_type f = regex_constants::normal);
|
||||
</pre>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if <i>s</i> is not a valid regular
|
||||
expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code>; the
|
||||
object's internal finite state machine is constructed from the regular
|
||||
expression contained in the string <i>s</i>, and interpreted according to the <a href="syntax_option_type.html">
|
||||
option flags</a> specified in <i>f</i>. The postconditions of this function
|
||||
are indicated in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table7" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>s.size()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>s</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre><BR>
|
||||
template <class ForwardIterator>
|
||||
<A name=c7></A>basic_regex(ForwardIterator first, ForwardIterator last, flag_type f = regex_constants::normal);
|
||||
</pre>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if the sequence <i>[first, last)</i>
|
||||
is not a valid regular expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Effects:</b> Constructs an object of class <code>basic_regex</code>; the
|
||||
object's internal finite state machine is constructed from the regular
|
||||
expression contained in the sequence of characters [first, last), and
|
||||
interpreted according to the <a href="syntax_option_type.html">option flags</a>
|
||||
specified in <i>f</i>. The postconditions of this function are indicated in the
|
||||
table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table8" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>distance(first,last)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>basic_string<charT>(first,last)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre><A name=o1></A>
|
||||
basic_regex& operator=(const basic_regex& e);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the result of <code>assign(e.str(), e.flags())</code>.</p>
|
||||
<pre><A name=o2></A>basic_regex& operator=(const charT* ptr);
|
||||
</pre>
|
||||
<p><b>Requires:</b> <i>p</i> shall not be a null pointer.</p>
|
||||
<p><b>Effects:</b> Returns the result of <code>assign(ptr)</code>.</p>
|
||||
<pre><A name=o3></A>
|
||||
template <class ST, class SA>
|
||||
basic_regex& operator=(const basic_string<charT, ST, SA>& p);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the result of <code>assign(p)</code>.</p>
|
||||
<h4>basic_regex iterators</h4>
|
||||
<pre><A name=m1>
|
||||
const_iterator begin() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns a starting iterator to a sequence of characters
|
||||
representing the regular expression.</p>
|
||||
<pre><A name=m2>
|
||||
const_iterator end() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns termination iterator to a sequence of characters
|
||||
representing the regular expression.</p>
|
||||
<h4>basic_regex capacity</h4>
|
||||
<pre><A name=m3>
|
||||
size_type size() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the length of the sequence of characters representing
|
||||
the regular expression.</p>
|
||||
<pre><A name=m4>
|
||||
size_type max_size() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the maximum length of the sequence of characters
|
||||
representing the regular expression.</p>
|
||||
<pre><A name=m5></A>
|
||||
bool empty() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <b>true</b> if the object does not contain a valid
|
||||
regular expression, otherwise <b>false</b>.</p>
|
||||
<pre><A name=m6></A>unsigned mark_count() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the number of marked sub-expressions within the regular
|
||||
expresion.</p>
|
||||
<h4>basic_regex assign</h4>
|
||||
<pre><A name=a1>
|
||||
basic_regex& assign(const basic_regex& that);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>assign(that.str(), that.flags())</code>.</p>
|
||||
<pre><A name=a2></A>
|
||||
basic_regex& assign(const charT* ptr, flag_type f = regex_constants::normal);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>assign(string_type(ptr), f)</code>.</p>
|
||||
<PRE><A name=a3></A><A name=a3></A>basic_regex& assign(const charT* ptr, unsigned int len, flag_type f);</PRE>
|
||||
<P><B>Effects:</B> Returns <CODE>assign(string_type(ptr, len), f)</CODE>.</P>
|
||||
<PRE><A name=a4></A>template <class string_traits, class A>
|
||||
<A name=a6></A>basic_regex& assign(const basic_string<charT, string_traits, A>& s,
|
||||
flag_type f = regex_constants::normal);
|
||||
</PRE>
|
||||
<p><b>Throws:</b> <code>bad_expression</code> if <i>s</i> is not a valid regular
|
||||
expression, unless the flag no_except is set in <EM>f</EM>.</p>
|
||||
<p><b>Returns:</b> <code>*this</code>.</p>
|
||||
<p><b>Effects:</b> Assigns the regular expression contained in the string <i>s</i>,
|
||||
interpreted according the <a href="syntax_option_type.html">option flags</a> specified
|
||||
in <i>f</i>. The postconditions of this function are indicated in the table:</p>
|
||||
<div align="center">
|
||||
<center>
|
||||
<table id="Table9" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>s.size()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>s</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>flags()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>f</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>mark_count()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The number of marked sub-expressions within the expression.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</center>
|
||||
</div>
|
||||
<pre>
|
||||
|
||||
</pre>
|
||||
<pre><A name=a5></A>template <class InputIterator>
|
||||
basic_regex& assign(InputIterator first, InputIterator last,
|
||||
flag_type f = regex_constants::normal);
|
||||
</pre>
|
||||
<p><b>Requires:</b> The type InputIterator corresponds to the Input Iterator
|
||||
requirements (24.1.1).</p>
|
||||
<p><b>Effects:</b> Returns <code>assign(string_type(first, last), f)</code>.</p>
|
||||
<h4>basic_regex constant operations</h4>
|
||||
<pre><A name=m7></A><A name=m8></A>flag_type flags() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns a copy of the regular expression syntax flags that were
|
||||
passed to the object's constructor, or the last call to <code>assign.</code></p>
|
||||
<CODE>
|
||||
<PRE><A name=m7></A><A name=m8></A><A name=m8b></A>int status() const;</PRE>
|
||||
</CODE>
|
||||
<P><STRONG>Effects</STRONG>: Returns zero if the expression contains a valid
|
||||
regular expression, otherwise an <A href="error_type.html">error code</A>.
|
||||
This member function is retained for use in environments that cannot use
|
||||
exception handling.</P>
|
||||
<pre><A name=m9></A>basic_string<charT> str() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns a copy of the character sequence passed to the object's
|
||||
constructor, or the last call to <code>assign.</code></p>
|
||||
<pre><A name=m10></A>int compare(basic_regex& e)const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> If <code>flags() == e.flags()</code> then returns <code>str().compare(e.str())</code>,
|
||||
otherwise returns <code>flags() - e.flags()</code>.</p>
|
||||
<h4>basic_regex locale</h4>
|
||||
<pre><A name=m11></A>locale_type imbue(locale_type l);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the result of <code>traits_inst.imbue(l)</code> where <code>
|
||||
traits_inst</code> is a (default initialized) instance of the template
|
||||
parameter <code>traits</code> stored within the object. Calls to imbue
|
||||
invalidate any currently contained regular expression.</p>
|
||||
<p><b>Postcondition:</b> <code>empty() == true</code>.</p>
|
||||
<pre><A name=m12></A>
|
||||
locale_type getloc() const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns the result of <code>traits_inst.getloc()</code> where <code>
|
||||
traits_inst</code> is a (default initialized) instance of the template
|
||||
parameter <code>traits</code> stored within the object.</p>
|
||||
<h4>basic_regex swap</h4>
|
||||
<pre><A name=m13>
|
||||
void swap(basic_regex& e) throw();
|
||||
</pre>
|
||||
<p><b>Effects:</b> Swaps the contents of the two regular expressions.</p>
|
||||
<p><b>Postcondition:</b> <code>*this</code> contains the characters that were in <i>e</i>,
|
||||
<i>e</i> contains the regular expression that was in <code>*this</code>.</p>
|
||||
<p><b>Complexity:</b> constant time.</p>
|
||||
<h4>basic_regex non-member functions</h4>
|
||||
<h5>basic_regex non-member comparison operators </h5>
|
||||
<P>Comparisons between basic_regex objects are provided on an experimental basis:
|
||||
please note that these are likely to be removed from the standard library
|
||||
proposal, so use with care if you are writing portable code.</P>
|
||||
<pre><A name=o4></A>
|
||||
template <class charT, class traits>
|
||||
bool operator == (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) == 0</code>.</p>
|
||||
<pre><A name=o5></A>
|
||||
template <class charT, class traits>
|
||||
bool operator != (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) != 0</code>.</p>
|
||||
<pre><A name=o7></A>
|
||||
template <class charT, class traits>
|
||||
bool operator < (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) < 0</code>.</p>
|
||||
<pre><A name=o8></A>
|
||||
template <class charT, class traits>
|
||||
bool operator <= (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) <= 0</code>.</p>
|
||||
<pre><A name=o9></A>
|
||||
template <class charT, class traits>
|
||||
bool operator >= (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) >= 0</code>.</p>
|
||||
<pre><A name=o10></A>
|
||||
template <class charT, class traits>
|
||||
bool operator > (const basic_regex<charT, traits>& lhs,
|
||||
const basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns <code>lhs.compare(rhs) > 0</code>.</p>
|
||||
<h5>basic_regex inserter.</h5>
|
||||
<P>The basic_regex stream inserter is provided on an experimental basis, and
|
||||
outputs the textual representation of the expression to the stream:</P>
|
||||
<pre><A name=o11></A>
|
||||
template <class charT, class io_traits, class re_traits>
|
||||
basic_ostream<charT, io_traits>&
|
||||
operator << (basic_ostream<charT, io_traits>& os
|
||||
const basic_regex<charT, re_traits>& e);
|
||||
</pre>
|
||||
<p><b>Effects:</b> Returns (os << e.str()).</p>
|
||||
<h5>basic_regex non-member swap</h5>
|
||||
<pre><A name=o12></A>
|
||||
template <class charT, class traits>
|
||||
void swap(basic_regex<charT, traits>& lhs,
|
||||
basic_regex<charT, traits>& rhs);
|
||||
</pre>
|
||||
<p><b>Effects:</b> calls <code>lhs.swap(rhs)</code>.</p>
|
||||
<hr>
|
||||
<p>Revised 7 Aug
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,254 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Understanding Captures</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Understanding Captures</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Captures are the iterator ranges that are "captured" by marked sub-expressions
|
||||
as a regular expression gets matched. Each marked sub-expression can
|
||||
result in more than one capture, if it is matched more than once. This
|
||||
document explains how captures and marked sub-expressions in Boost.Regex are
|
||||
represented and accessed.</P>
|
||||
<H2>Marked sub-expressions</H2>
|
||||
<P>Every time a Perl regular expression contains a parenthesis group (), it spits
|
||||
out an extra field, known as a marked sub-expression, for example the
|
||||
expression:</P>
|
||||
<PRE>(\w+)\W+(\w+)</PRE>
|
||||
<P>
|
||||
Has two marked sub-expressions (known as $1 and $2 respectively), in addition
|
||||
the complete match is known as $&, everything before the first match as $`,
|
||||
and everything after the match as $'. So if the above expression is
|
||||
searched for within "@abc def--", then we obtain:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="300" border="0">
|
||||
<TR>
|
||||
<TD>
|
||||
<P dir="ltr" style="MARGIN-RIGHT: 0px">$`</P>
|
||||
</TD>
|
||||
<TD>"@"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$&</TD>
|
||||
<TD>"abc def"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$1</TD>
|
||||
<TD>"abc"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$2</TD>
|
||||
<TD>"def"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$'</TD>
|
||||
<TD>"--"</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
</BLOCKQUOTE>
|
||||
<P>In Boost.regex all these are accessible via the <A href="match_results.html">match_results</A>
|
||||
class that gets filled in when calling one of the matching algorithms (<A href="regex_search.html">regex_search</A>,
|
||||
<A href="regex_match.html">regex_match</A>, or <A href="regex_iterator.html">regex_iterator</A>).
|
||||
So given:</P>
|
||||
<PRE>boost::match_results<IteratorType> m;</PRE>
|
||||
<P>The Perl and Boost.Regex equivalents are as follows:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Perl</STRONG></TD>
|
||||
<TD><STRONG>Boost.Regex</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$`</TD>
|
||||
<TD>m.prefix()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$&</TD>
|
||||
<TD>m[0]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$n</TD>
|
||||
<TD>m[n]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$'</TD>
|
||||
<TD>m.suffix()</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
</BLOCKQUOTE>
|
||||
<P>
|
||||
<P>In Boost.Regex each sub-expression match is represented by a <A href="sub_match.html">
|
||||
sub_match</A> object, this is basically just a pair of iterators denoting
|
||||
the start and end possition of the sub-expression match, but there are some
|
||||
additional operators provided so that objects of type sub_match behave a lot
|
||||
like a std::basic_string: for example they are implicitly <A href="sub_match.html#m3">
|
||||
convertible to a basic_string</A>, they can be <A href="sub_match.html#o21">compared
|
||||
to a string</A>, <A href="sub_match.html#o81">added to a string</A>, or <A href="sub_match.html#oi">
|
||||
streamed out to an output stream</A>.</P>
|
||||
<H2>Unmatched Sub-Expressions</H2>
|
||||
<P>When a regular expression match is found there is no need for all of the marked
|
||||
sub-expressions to have participated in the match, for example the expression:</P>
|
||||
<P>(abc)|(def)</P>
|
||||
<P>can match either $1 or $2, but never both at the same time. In
|
||||
Boost.Regex you can determine which sub-expressions matched by accessing the <A href="sub_match.html#m1">
|
||||
sub_match::matched</A> data member.</P>
|
||||
<H2>Repeated Captures</H2>
|
||||
<P>When a marked sub-expression is repeated, then the sub-expression gets
|
||||
"captured" multiple times, however normally only the final capture is
|
||||
available, for example if</P>
|
||||
<PRE>(?:(\w+)\W+)+</PRE>
|
||||
<P>is matched against</P>
|
||||
<PRE>one fine day</PRE>
|
||||
<P>Then $1 will contain the string "day", and all the previous captures will have
|
||||
been forgotten.</P>
|
||||
<P>However, Boost.Regex has an experimental feature that allows all the capture
|
||||
information to be retained - this is accessed either via the <A href="match_results.html#m17">
|
||||
match_results::captures</A> member function or the <A href="sub_match.html#m8">sub_match::captures</A>
|
||||
member function. These functions return a container that contains a
|
||||
sequence of all the captures obtained during the regular expression
|
||||
matching. The following example program shows how this information may be
|
||||
used:</P>
|
||||
<PRE>#include <boost/regex.hpp>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
void print_captures(const std::string& regx, const std::string& text)
|
||||
{
|
||||
boost::regex e(regx);
|
||||
boost::smatch what;
|
||||
std::cout << "Expression: \"" << regx << "\"\n";
|
||||
std::cout << "Text: \"" << text << "\"\n";
|
||||
if(boost::regex_match(text, what, e, boost::match_extra))
|
||||
{
|
||||
unsigned i, j;
|
||||
std::cout << "** Match found **\n Sub-Expressions:\n";
|
||||
for(i = 0; i < what.size(); ++i)
|
||||
std::cout << " $" << i << " = \"" << what[i] << "\"\n";
|
||||
std::cout << " Captures:\n";
|
||||
for(i = 0; i < what.size(); ++i)
|
||||
{
|
||||
std::cout << " $" << i << " = {";
|
||||
for(j = 0; j < what.captures(i).size(); ++j)
|
||||
{
|
||||
if(j)
|
||||
std::cout << ", ";
|
||||
else
|
||||
std::cout << " ";
|
||||
std::cout << "\"" << what.captures(i)[j] << "\"";
|
||||
}
|
||||
std::cout << " }\n";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "** No Match found **\n";
|
||||
}
|
||||
}
|
||||
|
||||
int main(int , char* [])
|
||||
{
|
||||
print_captures("(([[:lower:]]+)|([[:upper:]]+))+", "aBBcccDDDDDeeeeeeee");
|
||||
print_captures("(.*)bar|(.*)bah", "abcbar");
|
||||
print_captures("(.*)bar|(.*)bah", "abcbah");
|
||||
print_captures("^(?:(\\w+)|(?>\\W+))*$", "now is the time for all good men to come to the aid of the party");
|
||||
return 0;
|
||||
}</PRE>
|
||||
<P>Which produces the following output:</P>
|
||||
<PRE>Expression: "(([[:lower:]]+)|([[:upper:]]+))+"
|
||||
Text: "aBBcccDDDDDeeeeeeee"
|
||||
** Match found **
|
||||
Sub-Expressions:
|
||||
$0 = "aBBcccDDDDDeeeeeeee"
|
||||
$1 = "eeeeeeee"
|
||||
$2 = "eeeeeeee"
|
||||
$3 = "DDDDD"
|
||||
Captures:
|
||||
$0 = { "aBBcccDDDDDeeeeeeee" }
|
||||
$1 = { "a", "BB", "ccc", "DDDDD", "eeeeeeee" }
|
||||
$2 = { "a", "ccc", "eeeeeeee" }
|
||||
$3 = { "BB", "DDDDD" }
|
||||
Expression: "(.*)bar|(.*)bah"
|
||||
Text: "abcbar"
|
||||
** Match found **
|
||||
Sub-Expressions:
|
||||
$0 = "abcbar"
|
||||
$1 = "abc"
|
||||
$2 = ""
|
||||
Captures:
|
||||
$0 = { "abcbar" }
|
||||
$1 = { "abc" }
|
||||
$2 = { }
|
||||
Expression: "(.*)bar|(.*)bah"
|
||||
Text: "abcbah"
|
||||
** Match found **
|
||||
Sub-Expressions:
|
||||
$0 = "abcbah"
|
||||
$1 = ""
|
||||
$2 = "abc"
|
||||
Captures:
|
||||
$0 = { "abcbah" }
|
||||
$1 = { }
|
||||
$2 = { "abc" }
|
||||
Expression: "^(?:(\w+)|(?>\W+))*$"
|
||||
Text: "now is the time for all good men to come to the aid of the party"
|
||||
** Match found **
|
||||
Sub-Expressions:
|
||||
$0 = "now is the time for all good men to come to the aid of the party"
|
||||
$1 = "party"
|
||||
Captures:
|
||||
$0 = { "now is the time for all good men to come to the aid of the party" }
|
||||
$1 = { "now", "is", "the", "time", "for", "all", "good", "men", "to", "come", "to", "the", "aid", "of", "the", "party" }
|
||||
</PRE>
|
||||
<P>Unfortunately enabling this feature has an impact on performance (even if you
|
||||
don't use it), and a much bigger impact if you do use it, therefore to use this
|
||||
feature you need to:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Define BOOST_REGEX_MATCH_EXTRA for all translation units including the library
|
||||
source (the best way to do this is to uncomment this define in <A href="../../../boost/regex/user.hpp">
|
||||
boost/regex/user.hpp</A>
|
||||
and then rebuild everything.
|
||||
<LI>
|
||||
Pass the <A href="match_flag_type.html">match_extra flag</A> to the particular
|
||||
algorithms where you actually need the captures information (<A href="regex_search.html">regex_search</A>,
|
||||
<A href="regex_match.html">regex_match</A>, or <A href="regex_iterator.html">regex_iterator</A>).
|
||||
</LI>
|
||||
</UL>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
12 Dec 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,326 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Character Class Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Character Class Names.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><a href="#always">Character Classes that are Always Supported</a> <dt><a href="#unicode">
|
||||
Character classes that are supported by Unicode Regular Expressions</a></dt>
|
||||
</dl>
|
||||
<H3><A name="always"></A>Character Classes that are Always Supported</H3>
|
||||
<P>The following character class names are always supported by Boost.Regex:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>POSIX-standard name</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alnum</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alpha-numeric character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alpha</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any alphabetic character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>blank</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character that is not a line separator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>cntrl</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any control character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>d</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any decimal digit</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>digit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any decimal digit.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>graph</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any graphical character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>l</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>lower</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any lower case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>print</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any printable character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>punct</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any punctuation character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>s</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any whitespace character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>unicode</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any extended character whose code point is above 255 in value.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>u</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>upper</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any upper case character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>w</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>word</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Any word character (alphanumeric characters plus the underscore).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>xdigit</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Any hexadecimal digit character.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<H3><A name="unicode"></A>Character classes that are supported by Unicode Regular
|
||||
Expressions</H3>
|
||||
<P>The following character classes are only supported by <A href="icu_strings.html">Unicode
|
||||
Regular Expressions</A>: that is those that use the u32regex type. The
|
||||
names used are the same as those from <A href="http://www.unicode.org/versions/Unicode4.0.0/ch04.pdf#G124142">
|
||||
Chapter 4 of the Unicode standard</A>.</P>
|
||||
<table width="100%" ID="Table3">
|
||||
<tr>
|
||||
<td><b>Short Name</b></td>
|
||||
<td><b>Long Name</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
</td>
|
||||
<td>ASCII</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
</td>
|
||||
<td>Any</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
</td>
|
||||
<td>Assigned</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>C*</td>
|
||||
<td>Other</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cc</td>
|
||||
<td>Control</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cf</td>
|
||||
<td>Format</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cn</td>
|
||||
<td>Not Assigned</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Co</td>
|
||||
<td>Private Use</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Cs</td>
|
||||
<td>Surrogate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>L*</td>
|
||||
<td>Letter</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Ll</td>
|
||||
<td>Lowercase Letter</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lm</td>
|
||||
<td>Modifier Letter</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lo</td>
|
||||
<td>Other Letter</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lt</td>
|
||||
<td>Titlecase</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lu</td>
|
||||
<td>Uppercase Letter</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>M*</td>
|
||||
<td>Mark</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mc</td>
|
||||
<td>Spacing Combining Mark</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Me</td>
|
||||
<td>Enclosing Mark</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mn</td>
|
||||
<td>Non-Spacing Mark</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>N*</td>
|
||||
<td>Number</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Nd</td>
|
||||
<td>Decimal Digit Number</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Nl</td>
|
||||
<td>Letter Number</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>No</td>
|
||||
<td>Other Number</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>P*</td>
|
||||
<td>Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pc</td>
|
||||
<td>Connector Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pd</td>
|
||||
<td>Dash Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pe</td>
|
||||
<td>Close Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pf</td>
|
||||
<td>Final Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pi</td>
|
||||
<td>Initial Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Po</td>
|
||||
<td>Other Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Ps</td>
|
||||
<td>Open Punctuation</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>S*</td>
|
||||
<td>Symbol</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sc</td>
|
||||
<td>Currency Symbol</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sk</td>
|
||||
<td>Modifier Symbol</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sm</td>
|
||||
<td>Math Symbol</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>So</td>
|
||||
<td>Other Symbol</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Z*</td>
|
||||
<td>Separator</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Zl</td>
|
||||
<td>Line Separator</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Zp</td>
|
||||
<td>Paragraph Separator</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Zs</td>
|
||||
<td>Space Separator</td>
|
||||
</tr>
|
||||
</table>
|
||||
<HR>
|
||||
<P></P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
10 Jan 2005
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004-5</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,368 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Collating Element Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Collating Element Names</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#digraphs">Digraphs</A></dt>
|
||||
<dt><A href="#posix">POSIX Symbolic Names</A></dt>
|
||||
<dt><A href="#unicode">Unicode Symbolic Names</A></dt>
|
||||
</dl>
|
||||
<H3><A name="digraphs"></A>Digraphs</H3>
|
||||
<P>The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj",
|
||||
"Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
|
||||
<H3><A name="posix"></A>POSIX Symbolic Names</H3>
|
||||
<P>The following symbolic names are recognised as valid collating element names,
|
||||
in addition to any single character:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="50%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Name</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NUL</TD>
|
||||
<TD>\x00</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SOH</TD>
|
||||
<TD>\x01</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>STX</TD>
|
||||
<TD>\x02</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETX</TD>
|
||||
<TD>\x03</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EOT</TD>
|
||||
<TD>\x04</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ENQ</TD>
|
||||
<TD>\x05</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ACK</TD>
|
||||
<TD>\x06</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>alert</TD>
|
||||
<TD>\x07</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backspace</TD>
|
||||
<TD>\x08</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tab</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-tab</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>form-feed</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>carriage-return</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SO</TD>
|
||||
<TD>\xE</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SI</TD>
|
||||
<TD>\xF</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DLE</TD>
|
||||
<TD>\x10</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC1</TD>
|
||||
<TD>\x11</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC2</TD>
|
||||
<TD>\x12</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC3</TD>
|
||||
<TD>\x13</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DC4</TD>
|
||||
<TD>\x14</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>NAK</TD>
|
||||
<TD>\x15</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SYN</TD>
|
||||
<TD>\x16</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ETB</TD>
|
||||
<TD>\x17</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>CAN</TD>
|
||||
<TD>\x18</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>EM</TD>
|
||||
<TD>\x19</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>SUB</TD>
|
||||
<TD>\x1A</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ESC</TD>
|
||||
<TD>\x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS4</TD>
|
||||
<TD>\x1C</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS3</TD>
|
||||
<TD>\x1D</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS2</TD>
|
||||
<TD>\x1E</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>IS1</TD>
|
||||
<TD>\x1F</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>space</TD>
|
||||
<TD>\x20</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>exclamation-mark</TD>
|
||||
<TD>!</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>quotation-mark</TD>
|
||||
<TD>"</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>number-sign</TD>
|
||||
<TD>#</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>dollar-sign</TD>
|
||||
<TD>$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>percent-sign</TD>
|
||||
<TD>%</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>ampersand</TD>
|
||||
<TD>&</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>apostrophe</TD>
|
||||
<TD>'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-parenthesis</TD>
|
||||
<TD>(</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-parenthesis</TD>
|
||||
<TD>)</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>asterisk</TD>
|
||||
<TD>*</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>plus-sign</TD>
|
||||
<TD>+</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>comma</TD>
|
||||
<TD>,</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>hyphen</TD>
|
||||
<TD>-</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>period</TD>
|
||||
<TD>.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>slash</TD>
|
||||
<TD>/</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>zero</TD>
|
||||
<TD>0</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>one</TD>
|
||||
<TD>1</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>two</TD>
|
||||
<TD>2</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>three</TD>
|
||||
<TD>3</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>four</TD>
|
||||
<TD>4</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>five</TD>
|
||||
<TD>5</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>six</TD>
|
||||
<TD>6</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>seven</TD>
|
||||
<TD>7</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>eight</TD>
|
||||
<TD>8</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nine</TD>
|
||||
<TD>9</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>colon</TD>
|
||||
<TD>:</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>semicolon</TD>
|
||||
<TD>;</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>less-than-sign</TD>
|
||||
<TD><</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>equals-sign</TD>
|
||||
<TD>=</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>greater-than-sign</TD>
|
||||
<TD>></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>question-mark</TD>
|
||||
<TD>?</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>commercial-at</TD>
|
||||
<TD>@</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-square-bracket</TD>
|
||||
<TD>[</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>backslash</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-square-bracket</TD>
|
||||
<TD>]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>circumflex</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>underscore</TD>
|
||||
<TD>_</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>grave-accent</TD>
|
||||
<TD>`</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>left-curly-bracket</TD>
|
||||
<TD>{</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>vertical-line</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>right-curly-bracket</TD>
|
||||
<TD>}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>tilde</TD>
|
||||
<TD>~</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>DEL</TD>
|
||||
<TD>\x7F</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<H3><A name="unicode"></A>Named Unicode Characters</H3>
|
||||
<P>When using <A href="icu_strings.html">Unicode aware regular expressions</A> (with
|
||||
the <EM>u32regex </EM>type), all the normal symbolic names for Unicode
|
||||
characters (those given in Unidata.txt) are recognised.</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised 12 Jan 2005
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004-2005</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,453 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Index</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Concepts</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<H3><A name="charT"></A>charT requirements</H3>
|
||||
<P>Type charT used a template argument to <A href="basic_regex.html">class template
|
||||
basic_regex</A>, must have a trivial default constructor, copy constructor,
|
||||
assignment operator, and destructor. In addition the following
|
||||
requirements must be met for objects; c of type charT, c1 and c2 of type charT
|
||||
const, and i of type int:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Expression</STRONG></TD>
|
||||
<TD><STRONG>Return type</STRONG></TD>
|
||||
<TD><STRONG>Assertion / Note / Pre- / Post-condition</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>charT c</TD>
|
||||
<TD>charT</TD>
|
||||
<TD>Default constructor (must be trivial).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>charT c(c1)</TD>
|
||||
<TD>charT</TD>
|
||||
<TD>Copy constructor (must be trivial).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 = c2</TD>
|
||||
<TD>charT</TD>
|
||||
<TD>Assignment operator (must be trivial).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 == c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if c1 has the same value as c2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 != c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if c1 and c2 are not equal.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 < c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if the value of c1 is less than c2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 > c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if the value of c1 is greater than c2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 <= c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if c1 is less than or equal to c2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>c1 >= c2</TD>
|
||||
<TD>bool</TD>
|
||||
<TD>true if c1 is greater than or equal to c2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>intmax_t i = c1</TD>
|
||||
<TD>int</TD>
|
||||
<TD>
|
||||
<P>charT must be convertible to an integral type.</P>
|
||||
<P>Note: type charT is not required to support this operation, if the traits class
|
||||
used supports the full Boost-specific interface, rather than the minimal
|
||||
standardised-interface (see traits class requirements below).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>charT c(i);</TD>
|
||||
<TD>charT</TD>
|
||||
<TD>charT must be constructable from an integral type.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3><A name="traits"></A>traits requirements</H3>
|
||||
<P>There are two sets of requirements for the traits template argument to
|
||||
basic_regex: a mininal interface (which is part of the regex standardization
|
||||
proposal), and an optional Boost-specific enhanced interface.</P>
|
||||
<H4>Minimal requirements.</H4>
|
||||
<P>In the following table X denotes a traits class defining types and functions
|
||||
for the character container type charT; u is an object of type X; v is an
|
||||
object of type const X; p is a value of type const charT*; I1 and I2 are Input
|
||||
Iterators; c is a value of type const charT; s is an object of type
|
||||
X::string_type; cs is an object of type const X::string_type; b is a value of
|
||||
type bool; I is a value of type int; F1 and F2 are values of type const charT*;
|
||||
and loc is an object of type X::locale_type.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="7" width="100%" border="1">
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P><STRONG>Expression</STRONG></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P><STRONG>Return type</STRONG></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P><STRONG>Assertion / Note
|
||||
<BR>
|
||||
Pre / Post condition</STRONG></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::char_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>charT</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>The character container type used in the implementation of class template <CODE>basic_regex</CODE>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::size_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P> </P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>An unsigned integer type, capable of holding the length of a null-terminated
|
||||
string of charT's.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::string_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>std::basic_string<charT> or std::vector<charT></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P> </P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::locale_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>Implementation defined</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>A copy constructible type that represents the locale used by the traits class.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::char_class_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>Implementation defined</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>A bitmask type representing a particular character classification. Multiple
|
||||
values of this type can be bitwise-or'ed together to obtain a new valid value.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::length(p)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::size_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Yields the smallest <CODE>i</CODE> such that <CODE>p[i] == 0</CODE>. Complexity
|
||||
is linear in <CODE>i</CODE>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.translate(c)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::char_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns a character such that for any character d that is to be considered
|
||||
equivalent to c then v.translate(c) == v.translate(d).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.translate_nocase(c)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">X::char_type</TD>
|
||||
<TD vAlign="top" width="45%">For all characters C that are to be considered
|
||||
equivalent to c when comparisons are to be performed without regard to case,
|
||||
then v.translate_- nocase(c) == v.translate_- nocase(C).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.transform(F1, F2)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::string_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns a sort key for the character sequence designated by the iterator range
|
||||
[F1, F2) such that if the character sequence [G1, G2) sorts before the
|
||||
character sequence [H1, H2) then v.transform(G1, G2) < v.transform(H1,
|
||||
H2). </P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.transform_primary(F1, F2)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::string_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns a sort key for the character sequence designated by the iterator range
|
||||
[F1, F2) such that if the character sequence [G1, G2) sorts before the
|
||||
character sequence [H1, H2) when character case is not considered then
|
||||
v.transform_primary(G1, G2) < v.transform_- primary(H1, H2).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.lookup_classname(F1, F2)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::char_class_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Converts the character sequence designated by the iterator range [F1,F2) into a
|
||||
bitmask type that can subsequently be passed to isctype. Values returned from
|
||||
lookup_classname can be safely bitwise or'ed together. Returns 0 if the
|
||||
character sequence is not the name of a character class recognized by X. The
|
||||
value returned shall be independent of the case of the characters in the
|
||||
sequence.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.lookup_collatename(F1, F2)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::string_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns a sequence of characters that represents the collating element
|
||||
consisting of the character sequence designated by the iterator range [F1, F2).
|
||||
Returns an empty string if the character sequence is not a valid collating
|
||||
element.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.isctype(c, v.lookup_classname (F1, F2))</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>bool</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns true if character c is a member of the character class designated by
|
||||
the iterator range [F1, F2), false otherwise.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.value(c, i)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>int</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns the value represented by the digit c in base I if the character c is a
|
||||
valid digit in base I; otherwise returns -1. [Note: the value of I will only be
|
||||
8, 10, or 16. -end note]</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>u.imbue(loc)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::locale_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Imbues <CODE>u</CODE> with the locale <CODE>loc</CODE>, returns the previous
|
||||
locale used by u if any. </P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.getloc()</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>X::locale_type</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns the current locale used by <CODE>v</CODE> if any. </P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>v.error_string(i)</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="28%">
|
||||
<P>std::string</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">
|
||||
<P>Returns a human readable error string for the error condition <CODE>i</CODE>,
|
||||
where <CODE>i</CODE> is one of the values enumerated by type <CODE>regex_constants::error_type</CODE>.
|
||||
If the value <CODE>i</CODE> is not recognized then returns the string "Unknown
|
||||
error" or a localized equivalent.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4>Additional Optional Requirements</H4>
|
||||
<P>The following additional requirements are strictly optional, however in order
|
||||
for basic_regex to take advantage of these additional interfaces, all of the
|
||||
following requirements must be met; basic_regex will detect the presence or
|
||||
absense of member <EM>boost_extensions_tag </EM>and configure itself
|
||||
appropriately.</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Expression</STRONG></TD>
|
||||
<TD width="231"><STRONG>Result</STRONG></TD>
|
||||
<TD>
|
||||
<P><STRONG>Assertion / Note
|
||||
<BR>
|
||||
Pre / Post condition</STRONG></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>X::boost_extensions_tag</TD>
|
||||
<TD width="231">An unspecified type.</TD>
|
||||
<TD>When present, all of the extensions listed in this table must be present.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<P>v.syntax_type(c)</P>
|
||||
</TD>
|
||||
<TD width="231"><A href="../../../boost/regex/v4/syntax_type.hpp">regex_constants::syntax_type</A></TD>
|
||||
<TD>
|
||||
<P>Returns a symbolic value of type <CODE>regex_constants::syntax_type </CODE>that
|
||||
signifies the meaning of character <CODE>c</CODE> within the regular expression
|
||||
grammar.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>v.escape_syntax_type(c)</TD>
|
||||
<TD width="231"><A href="../../../boost/regex/v4/syntax_type.hpp">regex_constants::escape_syntax_type</A></TD>
|
||||
<TD>
|
||||
<P>Returns a symbolic value of type <CODE>regex_constants::escape_syntax_type</CODE>,
|
||||
that signifies the meaning of character <CODE>c</CODE> within the regular
|
||||
expression grammar, when <CODE>c</CODE> has been preceded by an escape
|
||||
character. Precondition: if <CODE>b</CODE> is the character preceding <CODE>c</CODE>
|
||||
in the expression being parsed then: <CODE>v.syntax_type(b) == syntax_escape</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<P>v.translate(c, b)</P>
|
||||
</TD>
|
||||
<TD width="231">X::char_type</TD>
|
||||
<TD>
|
||||
<P>Returns a character <CODE>d</CODE> such that: for any character <CODE>d</CODE> that
|
||||
is to be considered equivalent to <CODE>c</CODE> then <CODE>v.translate(c,false)==v.translate(d,false)</CODE>.
|
||||
Likewise for all characters <CODE>C</CODE> that are to be considered equivalent
|
||||
to <CODE>c</CODE> when comparisons are to be performed without regard to case,
|
||||
then <CODE>v.translate(c,true)==v.translate(C,true)</CODE>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<P>v.toi(I1, I2, i)</P>
|
||||
</TD>
|
||||
<TD width="231">An integer type capable of holding either a charT or an int.</TD>
|
||||
<TD>
|
||||
<P>Behaves as follows: if <CODE>p==q</CODE> or if <CODE>*p </CODE>is not a digit
|
||||
character then returns -1. Otherwise performs formatted numeric input on the
|
||||
sequence [p,q) and returns the result as an int. Postcondition: either <CODE>p ==
|
||||
q</CODE> or <CODE>*p</CODE> is a non-digit character.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<P>v.error_string(i)</P>
|
||||
</TD>
|
||||
<TD width="231">std::string</TD>
|
||||
<TD>
|
||||
<P>Returns a human readable error string for the error condition <CODE>i</CODE>,
|
||||
where <CODE>i</CODE> is one of the values enumerated by type <CODE><A href="error_type.html">
|
||||
regex_constants::error_type</A></CODE>. If the value <CODE>i</CODE>
|
||||
is not recognized then returns the string "Unknown error" or a localized
|
||||
equivalent.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>v.tolower(c)</TD>
|
||||
<TD width="231">X::char_type</TD>
|
||||
<TD>Converts c to lower case, used for Perl-style \l and \L formating operations.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>v.toupper(c)</TD>
|
||||
<TD width="231">X::char_type</TD>
|
||||
<TD>Converts c to upper case, used for Perl-style \u and \U formating operations.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3><A name="iterator"></A>Iterator Rrequirements</H3>
|
||||
<P>
|
||||
<P>The regular expression algorithms (and iterators) take all require a
|
||||
Bidirectional-Iterator.</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,155 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Configuration and setup</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Configuration and setup</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h2>Contents</h2>
|
||||
<dl class="index">
|
||||
<dt><a href="#compiler">Compiler setup</a> <dt><a href="#locale">Locale and traits class
|
||||
selection</a> <dt><a href="#linkage">Linkage Options</a> <dt><a href="#algorithm">Algorithm
|
||||
Selection</a> <dt><a href="#tuning">Algorithm Tuning</a></dt>
|
||||
</dl>
|
||||
<h3><a name="compiler"></a>Compiler setup.</h3>
|
||||
<p>You shouldn't need to do anything special to configure boost.regex for use with
|
||||
your compiler - the <a href="../../config/index.html">boost.config</a> subsystem
|
||||
should already take care of it, if you do have problems (or you are using a
|
||||
particularly obscure compiler or platform) then <a href="../../config/index.html">boost.config</a> has
|
||||
a <a href="../../config/config.htm#config_script">configure</a> script.</p>
|
||||
<h3><a name="locale"></a>Locale and traits class selection.</h3>
|
||||
<p>The following macros (see <a href="../../../boost/regex/user.hpp">user.hpp</a>)
|
||||
control how boost.regex interacts with the user's locale:</p>
|
||||
<table id="Table2" cellspacing="1" cellpadding="1" width="100%" border="1">
|
||||
<tr>
|
||||
<td width="265">BOOST_REGEX_USE_C_LOCALE</td>
|
||||
<td>
|
||||
Forces boost.regex to use the global C locale in its traits class support: this
|
||||
is now deprecated in favour of the C++ locale.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="265">BOOST_REGEX_USE_CPP_LOCALE</td>
|
||||
<td>Forces boost.regex to use std::locale in it's default traits class, regular
|
||||
expressions can then be imbued with an instance specific locale.
|
||||
This is the default behaviour on non-Windows platforms.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="265">BOOST_REGEX_NO_W32</td>
|
||||
<td>Tells boost.regex not to use any Win32 API's even when available (implies
|
||||
BOOST_REGEX_USE_CPP_LOCALE unless BOOST_REGEX_USE_C_LOCALE is set).</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<h3><a name="linkage"></a>Linkage Options</h3>
|
||||
<table id="Table3" cellspacing="1" cellpadding="1" width="100%" border="1">
|
||||
<tr>
|
||||
<td>BOOST_REGEX_DYN_LINK</td>
|
||||
<td>For Microsoft and Borland C++ builds, this tells boost.regex that it should
|
||||
link to the dll build of the boost.regex. By default boost.regex will
|
||||
link to its static library build, even if the dynamic C runtime library is in
|
||||
use.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BOOST_REGEX_NO_LIB</td>
|
||||
<td>For Microsoft and Borland C++ builds, this tells boost.regex that it should
|
||||
not automatically select the library to link to.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<h3><a name="algorithm"></a>Algorithm Selection</h3>
|
||||
<table id="Table4" cellspacing="1" cellpadding="1" width="100%" border="1">
|
||||
<tr>
|
||||
<td width="253">BOOST_REGEX_RECURSIVE</td>
|
||||
<td>Tells boost.regex to use a stack-recursive matching algorithm. This is
|
||||
generally the fastest option (although there is very little in it), but can
|
||||
cause stack overflow in extreme cases, on Win32 this can be handled safely, but
|
||||
this is not the case on other platforms.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="253">BOOST_REGEX_NON_RECURSIVE</td>
|
||||
<td>Tells boost.regex to use a non-stack recursive matching algorithm, this can be
|
||||
slightly slower than the alternative, but is always safe no matter how
|
||||
pathological the regular expression. This is the default on non-Win32
|
||||
platforms.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<h3><a name="tuning"></a>Algorithm Tuning</h3>
|
||||
<p>The following option applies only if BOOST_REGEX_RECURSIVE is set.</p>
|
||||
<table id="Table6" cellspacing="1" cellpadding="1" width="100%" border="1">
|
||||
<tr>
|
||||
<td>BOOST_REGEX_HAS_MS_STACK_GUARD</td>
|
||||
<td>Tells boost.regex that Microsoft style __try - __except blocks are supported,
|
||||
and can be used to safely trap stack overflow.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p>The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.</p>
|
||||
<table id="Table5" cellspacing="1" cellpadding="1" width="100%" border="1">
|
||||
<tr>
|
||||
<td>BOOST_REGEX_BLOCKSIZE</td>
|
||||
<td>In non-recursive mode, boost.regex uses largish blocks of memory to act as a
|
||||
stack for the state machine, the larger the block size then the fewer
|
||||
allocations that will take place. This defaults to 4096 bytes, which is
|
||||
large enough to match the vast majority of regular expressions without
|
||||
further allocations, however, you can choose smaller or larger values depending
|
||||
upon your platforms characteristics.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BOOST_REGEX_MAX_BLOCKS</td>
|
||||
<td>Tells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is
|
||||
permitted to use. If this value is exceeded then boost.regex will stop
|
||||
trying to find a match and throw a std::runtime_error. Defaults to 1024,
|
||||
don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BOOST_REGEX_MAX_CACHE_BLOCKS</td>
|
||||
<td>Tells boost.regex how many memory blocks to store in it's internal cache -
|
||||
memory blocks are taken from this cache rather than by calling ::operator
|
||||
new. Generally speeking this can be an order of magnitude faster than
|
||||
calling ::opertator new each time a memory block is required, but has the
|
||||
downside that boost.regex can end up caching a large chunk of memory (by
|
||||
default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size). If memory is
|
||||
tight then try defining this to 0 (disables all caching), or if that is too
|
||||
slow, then a value of 1 or 2, may be sufficient. On the other hand, on
|
||||
large multi-processor, multi-threaded systems, you may find that a higher value
|
||||
is in order.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
23 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,87 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Contacts</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Contacts and Acknowledgements</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<BR>
|
||||
<BR>
|
||||
<HR>
|
||||
<P>The author can be contacted at john@johnmaddock.co.uk; the home page for
|
||||
this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
|
||||
<P>I am indebted to <A href="http://www.cs.princeton.edu/~rs/">Robert Sedgewick's
|
||||
"Algorithms in C++" </A>for forcing me to think about algorithms and their
|
||||
performance, and to the folks at <A href="http://www.boost.org">boost</A> for
|
||||
forcing me to <I>think</I>, period.</P>
|
||||
<P><A href="http://www.boost-consulting.com">Eric Niebler</A>, author of the <A href="http://research.microsoft.com/projects/greta">
|
||||
GRETA regular expression component</A>, has shared several important ideas,
|
||||
in a series of long discussions.</P>
|
||||
<P>Pete Becker, of <A href="http://www.dinkumware.com/">Dinkumware Ltd</A>, has
|
||||
helped enormously with the standardisation proposal language.</P>
|
||||
<P>The following people have all contributed useful comments or fixes: Dave
|
||||
Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan B<>lsche,
|
||||
Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter
|
||||
Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire,
|
||||
Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan
|
||||
Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt,
|
||||
Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky,
|
||||
Herv<72> Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey
|
||||
Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and
|
||||
Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer,
|
||||
Perl and GNU regular expression libraries - wherever possible I have tried to
|
||||
maintain compatibility with these libraries and with the POSIX standard - the
|
||||
code however is entirely my own, including any bugs! I can absolutely guarantee
|
||||
that I will not fix any bugs I don't know about, so if you have any comments or
|
||||
spot any bugs, please get in touch.</P>
|
||||
<P>Useful further information can be found at:</P>
|
||||
<P>Short tutorials on regular expressions can be <A href="http://etext.lib.virginia.edu/helpsheets/regex.html">
|
||||
found here</A> and <A href="http://www.linuxpcug.org/lessons/regexp.html">here</A>.</P>
|
||||
<P>The main book on regular expressions is <A href="http://www.oreilly.com/catalog/regex/">
|
||||
Mastering Regular Expressions, published by O'Reilly</A>.</P>
|
||||
<P>Information on the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
|
||||
Boost.regex standardization proposal</A>, along with other <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1475.html">
|
||||
standard library extension proposals</A> can be found on the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/">
|
||||
C++ Committees web pages</A>.</P>
|
||||
<P>The<a href="http://www.opengroup.org/onlinepubs/7908799/toc.htm">Open Unix
|
||||
Specification</a> contains a wealth of useful material, including the
|
||||
regular expression syntax, and specifications for <a href="http://www.opengroup.org/onlinepubs/7908799/xsh/regex.h.html">
|
||||
<regex.h></a> and <a href="http://www.opengroup.org/onlinepubs/7908799/xsh/nl_types.h.html">
|
||||
<nl_types.h></a>.</P>
|
||||
<p>The <a href="http://www.cs.ucr.edu/~stelo/pattern.html">Pattern Matching Pointers</a>
|
||||
site is a "must visit" resource for anyone interested in pattern matching.</p>
|
||||
<p><a href="http://glimpse.cs.arizona.edu/">Glimpse and Agrep</a>, use a
|
||||
simplified regular expression syntax to achieve faster search times.</p>
|
||||
<p><a href="http://glimpse.cs.arizona.edu/udi.html">Udi Manber</a> and <a href="http://www.dcc.uchile.cl/~rbaeza/">
|
||||
Ricardo Baeza-Yates</a> both have a selection of useful pattern matching
|
||||
papers available from their respective web sites.</p>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,139 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: error_type</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">error_type</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a></dt></dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>Type error type represents the different types of errors that can be raised by
|
||||
the library when parsing a regular expression.</P>
|
||||
<pre>
|
||||
namespace boost{ namespace regex_constants{
|
||||
|
||||
typedef implementation-specific-type error_type;
|
||||
|
||||
static const error_type error_collate;
|
||||
static const error_type error_ctype;
|
||||
static const error_type error_escape;
|
||||
static const error_type error_backref;
|
||||
static const error_type error_brack;
|
||||
static const error_type error_paren;
|
||||
static const error_type error_brace;
|
||||
static const error_type error_badbrace;
|
||||
static const error_type error_range;
|
||||
static const error_type error_space;
|
||||
static const error_type error_badrepeat;
|
||||
static const error_type error_complexity;
|
||||
static const error_type error_stack;
|
||||
static const error_type error_bad_pattern;
|
||||
|
||||
} // namespace regex_constants
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<P> </P>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<P>The type error_type is an implementation-specific enumeration type that may
|
||||
take one of the following values:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Constant</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_collate</TD>
|
||||
<TD>An invalid collating element was specified in a [[.name.]] block.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_ctype</TD>
|
||||
<TD>An invalid character class name was specified in a [[:name:]] block.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_escape</TD>
|
||||
<TD>An invalid or trailing escape was encountered.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_backref</TD>
|
||||
<TD>A back-reference to a non-existant marked sub-expression was encountered.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_brack</TD>
|
||||
<TD>An invalid character set [...] was encountered.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_paren</TD>
|
||||
<TD>
|
||||
<P>Mismatched '(' and ')'.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_brace</TD>
|
||||
<TD>Mismatched '{' and '}'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_badbrace</TD>
|
||||
<TD>Invalid contents of a {...} block.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_range</TD>
|
||||
<TD>A character range was invalid, for example [d-a].</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_space</TD>
|
||||
<TD>Out of memory.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_badrepeat</TD>
|
||||
<TD>An attempt to repeat something that can not be repeated - for example a*+</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_complexity</TD>
|
||||
<TD>The expression became too complex to handle.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_stack</TD>
|
||||
<TD>Out of program stack space.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>error_bad_pattern</TD>
|
||||
<TD>Other unspecified errors.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3><A name="examples"></A>
|
||||
<HR>
|
||||
</H3>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" -->
|
||||
</p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,117 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Examples</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Examples</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<BR>
|
||||
<BR>
|
||||
<HR>
|
||||
<H3>Test Programs</H3>
|
||||
<H4>regress:</H4>
|
||||
<P>A regression test application that gives the matching/searching algorithms a
|
||||
full workout. The presence of this program is your guarantee that the library
|
||||
will behave as claimed - at least as far as those items tested are concerned -
|
||||
if anyone spots anything that isn't being tested I'd be glad to hear about it.</P>
|
||||
<P>Directory: <A href="../test/regress">libs/regex/test/regress</A>.</P>
|
||||
<P>Files: <A href="../test/regress/basic_tests.cpp">basic_tests.cpp</A> <A href="../test/regress/test_deprecated.cpp">
|
||||
test_deprecated.cpp</A> <A href="../test/regress/main.cpp">main.cpp</A>.</P>
|
||||
<H4>bad_expression_test:</H4>
|
||||
<P>Verifies that "bad" regular expressions don't cause the matcher to go into
|
||||
infinite loops, but to throw an exception instead.</P>
|
||||
<P>Directory: <A href="../test/pathology">libs/regex/test/pathology</A>.</P>
|
||||
<P>Files: <A href="../test/pathology/bad_expression_test.cpp">bad_expression_test.cpp</A>.</P>
|
||||
<H4>recursion_test:</H4>
|
||||
<P>Verifies that the matcher can't overrun the stack (no matter what the
|
||||
expression).</P>
|
||||
<P>Directory: <A href="../test/pathology">libs/regex/test/pathology</A>.</P>
|
||||
<P>Files: <A href="../test/pathology/recursion_test.cpp">recursion_test.cpp</A>.</P>
|
||||
<H4>concepts:</H4>
|
||||
<P>Verifies that the library meets all documented concepts (a compile only test).</P>
|
||||
<P>Directory: <A href="../test/concepts">libs/regex/test/concepts</A>.</P>
|
||||
<P>Files: <A href="../test/concepts/concept_check.cpp">concept_check.cpp</A>.</P>
|
||||
<H4>captures_test:</H4>
|
||||
<P>Test code for captures.</P>
|
||||
<P>Directory: <A href="../test/captures">libs/test/captures</A>.</P>
|
||||
<P>Files: <A href="../test/captures/captures_test.cpp">captures_test.cpp</A>.</P>
|
||||
<H3>Example programs</H3>
|
||||
<H4>grep</H4>
|
||||
<P>A simple grep implementation, run with the -h command line option to find out
|
||||
its usage.</P>
|
||||
<P>Files: <A href="../example/grep/grep.cpp">grep.cpp</A></P>
|
||||
<H4>timer.exe</H4>
|
||||
<P>A simple interactive expression matching application, the results of all
|
||||
matches are timed, allowing the programmer to optimize their regular
|
||||
expressions where performance is critical.</P>
|
||||
<P>Files: <A href="../example/timer/regex_timer.cpp">regex_timer.cpp</A>.</P>
|
||||
<H4>Code snippets</H4>
|
||||
<P>The snippets examples contain the code examples used in the documentation:</P>
|
||||
<P><A href="../example/snippets/captures_example.cpp">captures_example.cpp</A>:
|
||||
Demonstrates the use of captures.</P>
|
||||
<P><A href="../example/snippets/credit_card_example.cpp">credit_card_example.cpp</A>:
|
||||
Credit card number formatting code.</P>
|
||||
<P><A href="../example/snippets/partial_regex_grep.cpp">partial_regex_grep.cpp</A>:
|
||||
Search example using partial matches.</P>
|
||||
<P><A href="../example/snippets/partial_regex_match.cpp">partial_regex_match.cpp</A>:
|
||||
regex_match example using partial matches.</P>
|
||||
<P><A href="../example/snippets/regex_iterator_example.cpp">regex_iterator_example.cpp</A>:
|
||||
Iterating through a series of matches.</P>
|
||||
<P><A href="../example/snippets/regex_match_example.cpp">regex_match_example.cpp</A>:
|
||||
ftp based regex_match example.</P>
|
||||
<P><A href="../example/snippets/regex_merge_example.cpp">regex_merge_example.cpp</A>:
|
||||
regex_merge example: converts a C++ file to syntax highlighted HTML.</P>
|
||||
<P><A href="../example/snippets/regex_replace_example.cpp">regex_replace_example.cpp</A>:
|
||||
regex_replace example: converts a C++ file to syntax highlighted HTML</P>
|
||||
<P><A href="../example/snippets/regex_search_example.cpp">regex_search_example.cpp</A>:
|
||||
regex_search example: searches a cpp file for class definitions.</P>
|
||||
<P><A href="../example/snippets/regex_token_iterator_eg_1.cpp">regex_token_iterator_eg_1.cpp</A>:
|
||||
split a string into a series of tokens.</P>
|
||||
<P><A href="../example/snippets/regex_token_iterator_eg_2.cpp">regex_token_iterator_eg_2.cpp</A>:
|
||||
enumerate the linked URL's in a HTML file.</P>
|
||||
<P>The following are deprecated:</P>
|
||||
<P><A href="../example/snippets/regex_grep_example_1.cpp">regex_grep_example_1.cpp</A>:
|
||||
regex_grep example 1: searches a cpp file for class definitions.</P>
|
||||
<P><A href="../example/snippets/regex_grep_example_2.cpp">regex_grep_example_2.cpp</A>:
|
||||
regex_grep example 2: searches a cpp file for class definitions, using a global
|
||||
callback function.</P>
|
||||
<P><A href="../example/snippets/regex_grep_example_3.cpp">regex_grep_example_3.cpp</A>:
|
||||
regex_grep example 2: searches a cpp file for class definitions, using a bound
|
||||
member function callback.</P>
|
||||
<P><A href="../example/snippets/regex_grep_example_4.cpp">regex_grep_example_4.cpp</A>:
|
||||
regex_grep example 2: searches a cpp file for class definitions, using a C++
|
||||
Builder closure as a callback.</P>
|
||||
<P><A href="../example/snippets/regex_split_example_1.cpp">regex_split_example_1.cpp</A>:
|
||||
regex_split example: split a string into tokens.</P>
|
||||
<P><A href="../example/snippets/regex_split_example_2.cpp">regex_split_example_2.cpp</A>
|
||||
: regex_split example: spit out linked URL's.</P>
|
||||
<P></P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
28 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
114
doc/faq.html
114
doc/faq.html
@ -1,114 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: FAQ</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">FAQ</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<font color="#ff0000"><font color="#ff0000"></font></font>
|
||||
<p><font color="#ff0000"><font color="#ff0000"><font color="#ff0000"> Q. Why can't I
|
||||
use the "convenience" versions of regex_match / regex_search / regex_grep /
|
||||
regex_format / regex_merge?</font></font></font></p>
|
||||
<p>A. These versions may or may not be available depending upon the capabilities
|
||||
of your compiler, the rules determining the format of these functions are quite
|
||||
complex - and only the versions visible to a standard compliant compiler are
|
||||
given in the help. To find out what your compiler supports, run
|
||||
<boost/regex.hpp> through your C++ pre-processor, and search the output
|
||||
file for the function that you are interested in.<font color="#ff0000"><font color="#ff0000"></font></font></p>
|
||||
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get regex++ to work with
|
||||
escape characters, what's going on?</font></font></p>
|
||||
<p>A. If you embed regular expressions in C++ code, then remember that escape
|
||||
characters are processed twice: once by the C++ compiler, and once by the
|
||||
regex++ expression compiler, so to pass the regular expression \d+ to regex++,
|
||||
you need to embed "\\d+" in your code. Likewise to match a literal backslash
|
||||
you will need to embed "\\\\" in your code. <font color="#ff0000"></font>
|
||||
</p>
|
||||
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX regular expression
|
||||
change the result of a match?</font></p>
|
||||
<p>For POSIX (extended and basic) regular expressions, but not for perl regexes,
|
||||
parentheses don't only mark; they determine what the best match is as well.
|
||||
When the expression is compiled as a POSIX basic or extended regex then
|
||||
Boost.regex follows the POSIX standard leftmost longest rule for determining
|
||||
what matched. So if there is more than one possible match after considering the
|
||||
whole expression, it looks next at the first sub-expression and then the second
|
||||
sub-expression and so on. So...</p>
|
||||
<pre>
|
||||
"(0*)([0-9]*)" against "00123" would produce
|
||||
$1 = "00"
|
||||
$2 = "123"
|
||||
</pre>
|
||||
<p>where as</p>
|
||||
<pre>
|
||||
"0*([0-9])*" against "00123" would produce
|
||||
$1 = "00123"
|
||||
</pre>
|
||||
<p>If you think about it, had $1 only matched the "123", this would be "less good"
|
||||
than the match "00123" which is both further to the left and longer. If you
|
||||
want $1 to match only the "123" part, then you need to use something like:</p>
|
||||
<pre>
|
||||
"0*([1-9][0-9]*)"
|
||||
</pre>
|
||||
<p>as the expression.</p>
|
||||
<p><font color="#ff0000">Q. Why don't character ranges work properly (POSIX mode
|
||||
only)?</font><br>
|
||||
A. The POSIX standard specifies that character range expressions are locale
|
||||
sensitive - so for example the expression [A-Z] will match any collating
|
||||
element that collates between 'A' and 'Z'. That means that for most locales
|
||||
other than "C" or "POSIX", [A-Z] would match the single character 't' for
|
||||
example, which is not what most people expect - or at least not what most
|
||||
people have come to expect from regular expression engines. For this reason,
|
||||
the default behaviour of boost.regex (perl mode) is to turn locale sensitive
|
||||
collation off by not setting the regex_constants::collate compile time flag.
|
||||
However if you set a non-default compile time flag - for example
|
||||
regex_constants::extended or regex_constants::basic, then locale dependent
|
||||
collation will be enabled, this also applies to the POSIX API functions which
|
||||
use either regex_constants::extended or regex_constants::basic internally. <i>[Note
|
||||
- when regex_constants::nocollate in effect, the library behaves "as if" the
|
||||
LC_COLLATE locale category were always "C", regardless of what its actually set
|
||||
to - end note</i>].</p>
|
||||
<p><font color="#ff0000">Q. Why are there no throw specifications on any of the
|
||||
functions? What exceptions can the library throw?</font></p>
|
||||
<p>A. Not all compilers support (or honor) throw specifications, others support
|
||||
them but with reduced efficiency. Throw specifications may be added at a later
|
||||
date as compilers begin to handle this better. The library should throw only
|
||||
three types of exception: boost::bad_expression can be thrown by basic_regex
|
||||
when compiling a regular expression, std::runtime_error can be thrown when a
|
||||
call to basic_regex::imbue tries to open a message catalogue that doesn't
|
||||
exist, or when a call to regex_search or regex_match results in an
|
||||
"everlasting" search, or when a call to RegEx::GrepFiles or
|
||||
RegEx::FindFiles tries to open a file that cannot be opened, finally
|
||||
std::bad_alloc can be thrown by just about any of the functions in this
|
||||
library.</p>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,163 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Boost-Extended Format String Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Boost-Extended Format String Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Boost-Extended format strings treat all characters as literals except for
|
||||
'$', '\', '(', ')', '?', ':' and '\'.</P>
|
||||
<H4>Grouping</H4>
|
||||
<P>The characters '(' and ')' perform lexical grouping, use \( and \) if you want
|
||||
a to output literal parenthesis.</P>
|
||||
<H4>Conditionals</H4>
|
||||
<P>The character '?' begins a conditional expression, the general form is:</P>
|
||||
<PRE>?Ntrue-expression:false-expression</PRE>
|
||||
<P>where N is decimal digit.</P>
|
||||
<P>If sub-expression <EM>N</EM> was matched, then true-expression is evaluated and
|
||||
sent to output, otherwise false-expression is evaluated and sent to output.</P>
|
||||
<P>You will normally need to surround a conditional-expression with parenthesis in
|
||||
order to prevent ambiguities.</P>
|
||||
<H4>Placeholder Sequences</H4>
|
||||
<P>Placeholder sequences specify that some part of what matched the regular
|
||||
expression should be sent to output as follows:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="4" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Placeholder</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$&</TD>
|
||||
<TD>Outputs what matched the whole expression.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$`</TD>
|
||||
<TD>Outputs the text between the end of the last match found (or the start of the
|
||||
text if no previous match was found), and the start of the current match.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$'</TD>
|
||||
<TD>Outputs all the text following the end of the current match.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$$</TD>
|
||||
<TD>Outputs a literal '$'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$n</TD>
|
||||
<TD>Outputs what matched the n'th sub-expression.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Any $-placeholder sequence not listed above, results in '$' being treated as a
|
||||
literal.</P>
|
||||
<H4>Escape Sequences</H4>
|
||||
<P>An escape character followed by any character <EM>x</EM>, outputs that
|
||||
character unless <EM>x</EM> is one of the escape sequences shown below.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="4" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>Outputs the bell character: '\a'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>Outputs the ANSI escape character (code point 27).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>Outputs a form feed character: '\f'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>Outputs a newline character: '\n'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>Outputs a carriage return character: '\r'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>Outputs a tab character: '\t'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>Outputs a vertical tab character: '\v'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xDD</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{DDDD}</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDDDDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>Outputs the ANSI escape sequence "escape-X".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>If <EM>D</EM> is a decimal digit in the range 1-9, then outputs the text that
|
||||
matched sub-expression <EM>D</EM>.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>Causes the next character to be outputted, to be output in lower case.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>Causes the next character to be outputted, to be output in upper case.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>Causes all subsequent characters to be output in lower case, until a \E is
|
||||
found.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>Causes all subsequent characters to be output in upper case, until a \E is
|
||||
found.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\E</TD>
|
||||
<TD>Terminates a \L or \U sequence.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Nov 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,150 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Perl-Style Format String Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Perl-Style Format String Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Perl-style format strings treat all characters as literals except '$' and '\'
|
||||
which start placeholder and escape sequences respectively.</P>
|
||||
<P>Placeholder sequences specify that some part of what matched the regular
|
||||
expression should be sent to output as follows:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="4" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Placeholder</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$&</TD>
|
||||
<TD>Outputs what matched the whole expression.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$`</TD>
|
||||
<TD>Outputs the text between the end of the last match found (or the start of the
|
||||
text if no previous match was found), and the start of the current match.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$'</TD>
|
||||
<TD>Outputs all the text following the end of the current match.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$$</TD>
|
||||
<TD>Outputs a literal '$'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>$n</TD>
|
||||
<TD>Outputs what matched the n'th sub-expression.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Any $-placeholder sequence not listed above, results in '$' being treated as a
|
||||
literal.</P>
|
||||
<P>An escape character followed by any character <EM>x</EM>, outputs that
|
||||
character unless <EM>x</EM> is one of the escape sequences shown below.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="4" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>Outputs the bell character: '\a'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>Outputs the ANSI escape character (code point 27).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>Outputs a form feed character: '\f'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>Outputs a newline character: '\n'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>Outputs a carriage return character: '\r'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>Outputs a tab character: '\t'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>Outputs a vertical tab character: '\v'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xDD</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{DDDD}</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDDDDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>Outputs the ANSI escape sequence "escape-X".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>If <EM>D</EM> is a decimal digit in the range 1-9, then outputs the text that
|
||||
matched sub-expression <EM>D</EM>.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>Causes the next character to be outputted, to be output in lower case.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>Causes the next character to be outputted, to be output in upper case.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>Causes all subsequent characters to be output in lower case, until a \E is
|
||||
found.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>Causes all subsequent characters to be output in upper case, until a \E is
|
||||
found.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\E</TD>
|
||||
<TD>Terminates a \L or \U sequence.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Nov 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,109 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Sed-Style Format String Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Sed-Style Format String Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Sed-style format strings treat all characters as literals except:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="4" cellPadding="3" width="100%" border="0">
|
||||
<TR>
|
||||
<TD>&</TD>
|
||||
<TD>The ampersand character is replaced in the output stream by the the whole of
|
||||
what matched the regular expression. Use \& to output a literal
|
||||
'&' character.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\</TD>
|
||||
<TD>Specifies an escape sequence.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<P>An escape character followed by any character <EM>x</EM>, outputs that
|
||||
character unless <EM>x</EM> is one of the escape sequences shown below.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="4" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Meaning</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>Outputs the bell character: '\a'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>Outputs the ANSI escape character (code point 27).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>Outputs a form feed character: '\f'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>Outputs a newline character: '\n'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>Outputs a carriage return character: '\r'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>Outputs a tab character: '\t'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>Outputs a vertical tab character: '\v'.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xDD</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{DDDD}</TD>
|
||||
<TD>Outputs the character whose hexadecimal code point is 0xDDDDD</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>Outputs the ANSI escape sequence "escape-X".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>If <EM>D</EM> is a decimal digit in the range 1-9, then outputs the text that
|
||||
matched sub-expression <EM>D</EM>.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Nov 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,52 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Format String Syntax</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Format String Syntax</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<p>Format strings are used by the algorithm <a href="regex_replace.html">regex_replace</a> and
|
||||
by <a href="match_results.html#m12">match_results::format</a>, and are used to
|
||||
transform one string into another.</p>
|
||||
<p>
|
||||
There are three kind of format string: Sed, Perl and Boost-extended.</p>
|
||||
<P>Alternatively, when the flag <code>format_literal</code> is passed to one of these
|
||||
functions, then the format string is treated as a string literal, and is copied
|
||||
unchanged to the output.</P>
|
||||
<P><A href="format_sed_syntax.html">Sed Style Format Strings</A><BR>
|
||||
<A href="format_perl_syntax.html">Perl Style Format Strings</A><BR>
|
||||
<A href="format_boost_syntax.html">Boost-Extended Format Strings</A></P>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Nov 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,48 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Headers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Headers</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>There are two main headers used by this library: <boost/regex.hpp>
|
||||
provides full access to the main template library, while
|
||||
<boost/cregex.hpp> provides access to the (deprecated) high level class
|
||||
RegEx, and the POSIX API functions.
|
||||
</P>
|
||||
<P>There is also a header containing only forward declarations
|
||||
<boost/regex_fwd.hpp> for use when an interface is dependent upon
|
||||
boost::basic_regex, but otherwise does not need the full definitions.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
28 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
177
doc/history.html
177
doc/history.html
@ -1,177 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: History</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">History</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Boost 1.34</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Fix for non-greedy repeats and partial matches not working correctly in some
|
||||
cases.
|
||||
<LI>
|
||||
Fix for non-greedy repeats on VC++ not working in some cases (bug report
|
||||
1515830).
|
||||
<LI>
|
||||
Changed match_results::position() to return a valid result when *this
|
||||
represents a partial match.</LI>
|
||||
</UL>
|
||||
<P>Boost 1.33.1</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Fixed broken makefiles.
|
||||
<LI>
|
||||
Fixed configuration setup to allow building with VC7.1 - STLport-4.6.2 when
|
||||
using /Zc:wchar_t.
|
||||
<LI>
|
||||
Moved declarations class-inline in static_mutex.hpp so that SGI Irix compiler
|
||||
can cope.
|
||||
<LI>
|
||||
Added needed standard library #includes to fileiter.hpp, regex_workaround.hpp
|
||||
and cpp_regex_traits.hpp.
|
||||
<LI>
|
||||
Fixed a bug where non-greedy repeats could in certain strange curcumstances
|
||||
repeat more times than their maximum value.
|
||||
<LI>
|
||||
Fixed the value returned by basic_regex<>::empty() from a default
|
||||
constructed object.
|
||||
<LI>
|
||||
Changed the deffinition of regex_error to make it backwards compatible with
|
||||
Boost-1.32.0.
|
||||
<LI>
|
||||
Disabled external templates for Intel C++ 8.0 and earlier - otherwise
|
||||
unresolved references can occur.
|
||||
<LI>
|
||||
Rewritten extern template code for gcc so that only specific member functions
|
||||
are exported: otherwise strange unresolved references can occur when linking
|
||||
and mixing debug and non-debug code.
|
||||
<LI>
|
||||
Initialise all the data members of the unicode_iterators: this keeps gcc from
|
||||
issuing needless warnings.
|
||||
<LI>
|
||||
Ported the ICU integration code to VC6 and VC7.
|
||||
<LI>
|
||||
Ensured code is STLport debug mode clean.
|
||||
<LI>
|
||||
Fixed lookbehind assertions so that fixed length repeats are permitted, and so
|
||||
that regex iteration allows lookbehind to look back before the current search
|
||||
range (into the last match).
|
||||
<LI>
|
||||
Fixed strange bug with non-greedy repeats inside forward lookahead assertions.
|
||||
<LI>
|
||||
Enabled negated character classes inside character sets.
|
||||
<LI>
|
||||
Fixed regression so that [a-z-] is a valid expression again.
|
||||
<LI>
|
||||
Fixed bug that allowed some invalid expressions to be accepted.</LI></UL>
|
||||
<P>Boost 1.33.0.</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Completely rewritten expression parsing code, and traits class support; now
|
||||
conforms to the standardization proposal.
|
||||
<LI>
|
||||
<STRONG>Breaking Change:</STRONG> The <A href="syntax_option_type.html">syntax
|
||||
options</A> that can be passed to <A href="basic_regex.html">basic_regex
|
||||
constructors</A> have been rationalized. The default option (perl) now
|
||||
has a value of zero, and it is now clearly documented which options apply to
|
||||
which <A href="syntax.html">regular expression syntax styles (perl,
|
||||
POSIX-extended, POSIX-basic etc)</A>. Some of the more esoteric
|
||||
options have now been removed, so there is the possibility that existing code
|
||||
may fail to compile: however equivalent functionality should still be
|
||||
available.
|
||||
<LI>
|
||||
<STRONG>Breaking Change: </STRONG>
|
||||
POSIX-extended and POSIX-basic regular expressions now enforce the letter of
|
||||
the POSIX standard much more closely than before.
|
||||
<LI>
|
||||
Added <A href="syntax_perl.html#Perl">support for (?imsx-imsx) constructs</A>.
|
||||
<LI>
|
||||
Added <A href="syntax_perl.html#Perl">support for lookbehind expressions
|
||||
(?<=positive-lookbehind) and (?<!negative-lookbehind)</A>.
|
||||
<LI>
|
||||
Added <A href="syntax_perl.html#Perl">support for conditional expressions
|
||||
(?(assertion)true-expresion|false-expression)</A>.
|
||||
<LI>
|
||||
Added <A href="mfc_strings.html">MFC/ATL string wrappers</A>.
|
||||
<LI>
|
||||
Added <A href="unicode.html">Unicode support; based on ICU</A>.
|
||||
<LI>
|
||||
Changed newline support to recognise \f as a line separator (all character
|
||||
types), and \x85 as a line separator for wide characters / Unicode only.
|
||||
<LI>
|
||||
Added a new format flag <A href="match_flag_type.html"><code>format_literal</code></A>
|
||||
that treats the replace string as a literal, rather than a Perl or Sed style <A href="format_syntax.html">
|
||||
format string</A>.
|
||||
<LI>
|
||||
Errors are now reported by throwing exceptions of type <A href="bad_expression.html">
|
||||
<code>regex_error</code></A>. The types used previously - <code>bad_expression</code>
|
||||
and <code>bad_pattern</code> - are now just typedefs for <code>regex_error</code>.
|
||||
Type <code>regex_error</code> has a couple of new members: <code>code()</code> to
|
||||
report an error code rather than a string, and <code>position()</code> to
|
||||
report where in the expression the error occured.</LI></UL>
|
||||
<P>Boost 1.32.1.</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Fixed bug in partial matches of bounded repeats of '.'.</LI></UL>
|
||||
<P>Boost 1.31.0.</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Completely rewritten pattern matching code - it is now up to 10 times faster
|
||||
than before.
|
||||
<LI>
|
||||
Reorganized documentation.
|
||||
<LI>
|
||||
Deprecated all interfaces that are not part of the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
|
||||
regular expression standardization proposal</A>.
|
||||
<LI>
|
||||
Added <A href="regex_iterator.html">regex_iterator</A> and <A href="regex_token_iterator.html">
|
||||
regex_token_iterator</A>
|
||||
.
|
||||
<LI>
|
||||
Added support for Perl style independent sub-expressions.
|
||||
<LI>
|
||||
Added non-member operators to the<A href="sub_match.html"> sub_match class</A>,
|
||||
so that you can compare sub_match's with strings, or add them to a string to
|
||||
produce a new string.
|
||||
<LI>
|
||||
Added experimental support for <A href="captures.html">extended capture
|
||||
information</A>.
|
||||
<LI>
|
||||
Changed the match flags so that they are a distinct type (not an integer), if
|
||||
you try to pass the match flags as an integer rather than<A href="match_flag_type.html">
|
||||
match_flag_type</A> to the regex algorithms then you will now get a compiler
|
||||
error.</LI></UL>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
28 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,468 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Working With Unicode and ICU String Types</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Working With Unicode and ICU String Types.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><a href="#introduction">Introduction</a></dt>
|
||||
<dt><a href="#types">Unicode regular expression types</a></dt>
|
||||
<dt><a href="#algo">Regular Expression Algorithms</a>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="#u32regex_match">u32regex_match</a></dt>
|
||||
<dt><a href="#u32regex_search">u32regex_search</a></dt>
|
||||
<dt><a href="#u32regex_replace">u32regex_replace</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
</dt>
|
||||
<dt><a href="#iterators">Iterators</a>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="#u32regex_iterator">u32regex_iterator</a></dt>
|
||||
<dt><a href="#u32regex_token_iterator">u32regex_token_iterator</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
</dt>
|
||||
</dl>
|
||||
<H3><A name="introduction"></A>Introduction</H3>
|
||||
<P>The header:</P>
|
||||
<PRE><boost/regex/icu.hpp></PRE>
|
||||
<P>contains the data types and algorithms necessary for working with regular
|
||||
expressions in a Unicode aware environment.
|
||||
</P>
|
||||
<P>In order to use this header you will need <A href="http://www.ibm.com/software/globalization/icu/">
|
||||
the ICU library</A>, and you will need to have built the Boost.Regex library
|
||||
with <A href="install.html#unicode">ICU support enabled</A>.</P>
|
||||
<P>The header will enable you to:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Create regular expressions that treat Unicode strings as sequences of UTF-32
|
||||
code points.
|
||||
<LI>
|
||||
Create regular expressions that support various Unicode data properties,
|
||||
including character classification.
|
||||
<LI>
|
||||
Transparently search Unicode strings that are encoded as either UTF-8, UTF-16
|
||||
or UTF-32.</LI></UL>
|
||||
<H3><A name="types"></A>Unicode regular expression types</H3>
|
||||
<P>Header <boost/regex/icu.hpp> provides a regular expression traits
|
||||
class that handles UTF-32 characters:</P>
|
||||
<PRE>class icu_regex_traits;</PRE>
|
||||
<P>and a regular expression type based upon that:</P>
|
||||
<PRE>typedef basic_regex<UChar32,icu_regex_traits> u32regex;</PRE>
|
||||
<P>The type <EM>u32regex</EM> is regular expression type to use for all Unicode
|
||||
regular expressions; internally it uses UTF-32 code points, but can be created
|
||||
from, and used to search, either UTF-8, or UTF-16 encoded strings as well as
|
||||
UTF-32 ones.</P>
|
||||
<P>The <A href="basic_regex.html#c2">constructors</A>, and <A href="basic_regex.html#a1">
|
||||
assign</A> member functions of u32regex, require UTF-32 encoded strings, but
|
||||
there are a series of overloaded algorithms called make_u32regex which allow
|
||||
regular expressions to be created from UTF-8, UTF-16, or UTF-32 encoded
|
||||
strings:</P>
|
||||
<PRE>template <class InputIterator>
|
||||
u32regex make_u32regex(InputIterator i, InputIterator j, boost::regex_constants::syntax_option_type opt);
|
||||
</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the iterator
|
||||
sequence [i,j). The character encoding of the sequence is determined based upon <code>
|
||||
sizeof(*i)</code>: 1 implies UTF-8, 2 implies UTF-16, and 4 implies UTF-32.</P>
|
||||
<PRE>u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);
|
||||
</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the
|
||||
Null-terminated UTF-8 characater sequence <EM>p</EM>.</P>
|
||||
<PRE>u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the
|
||||
Null-terminated UTF-8 characater sequence <EM>p</EM>.u32regex
|
||||
make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt
|
||||
= boost::regex_constants::perl);</P>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the
|
||||
Null-terminated characater sequence <EM>p</EM>. The character encoding of
|
||||
the sequence is determined based upon <CODE>sizeof(wchar_t)</CODE>: 1 implies
|
||||
UTF-8, 2 implies UTF-16, and 4 implies UTF-32.</P>
|
||||
<PRE>u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the
|
||||
Null-terminated UTF-16 characater sequence <EM>p</EM>.</P>
|
||||
<PRE>template<class C, class T, class A>
|
||||
u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the string <EM>s</EM>.
|
||||
The character encoding of the string is determined based upon <CODE>sizeof(C)</CODE>:
|
||||
1 implies UTF-8, 2 implies UTF-16, and 4 implies UTF-32.</P>
|
||||
<PRE>u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl);</PRE>
|
||||
<P><STRONG>Effects:</STRONG> Creates a regular expression object from the UTF-16
|
||||
encoding string <EM>s</EM>.</P>
|
||||
<H3><A name="algo"></A>Regular Expression Algorithms</H3>
|
||||
<P>The regular expression algorithms <A href="regex_match.html">regex_match</A>, <A href="regex_search.html">
|
||||
regex_search</A> and <A href="regex_replace.html">regex_replace</A> all
|
||||
expect that the character sequence upon which they operate, is encoded in the
|
||||
same character encoding as the regular expression object with which they are
|
||||
used. For Unicode regular expressions that behavior is undesirable: while
|
||||
we may want to process the data in UTF-32 "chunks", the actual data is much
|
||||
more likely to encoded as either UTF-8 or UTF-16. Therefore the header
|
||||
<boost/regex/icu.hpp> provides a series of thin wrappers around these
|
||||
algorithms, called u32regex_match, u32regex_search, and u32regex_replace.
|
||||
These wrappers use iterator-adapters internally to make external UTF-8 or
|
||||
UTF-16 data look as though it's really a UTF-32 sequence, that can then be
|
||||
passed on to the "real" algorithm.</P>
|
||||
<H4><A name="u32regex_match"></A>u32regex_match</H4>
|
||||
<P>For each <A href="regex_match.html">regex_match</A> algorithm defined by
|
||||
<boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded
|
||||
algorithm that takes the same arguments, but which is called <EM>u32regex_match</EM>,
|
||||
and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an
|
||||
ICU UnicodeString as input.</P>
|
||||
<P><STRONG>Example: </STRONG>match a password, encoded in a UTF-16 UnicodeString:</P>
|
||||
<PRE>//
|
||||
// Find out if *password* meets our password requirements,
|
||||
// as defined by the regular expression *requirements*.
|
||||
//
|
||||
bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements)
|
||||
{
|
||||
return boost::u32regex_match(password, boost::make_u32regex(requirements));
|
||||
}
|
||||
</PRE>
|
||||
<P>
|
||||
<P><STRONG>Example: </STRONG>match a UTF-8 encoded filename:</P>
|
||||
<PRE>//
|
||||
// Extract filename part of a path from a UTF-8 encoded std::string and return the result
|
||||
// as another std::string:
|
||||
//
|
||||
std::string get_filename(const std::string& path)
|
||||
{
|
||||
boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
|
||||
boost::smatch what;
|
||||
if(boost::u32regex_match(path, what, r))
|
||||
{
|
||||
// extract $1 as a CString:
|
||||
return what.str(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("Invalid pathname");
|
||||
}
|
||||
}
|
||||
</PRE>
|
||||
<H4><A name="u32regex_search"></A>u32regex_search</H4>
|
||||
<P>For each <A href="regex_search.html">regex_search</A> algorithm defined by
|
||||
<boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded
|
||||
algorithm that takes the same arguments, but which is called <EM>u32regex_search</EM>,
|
||||
and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an
|
||||
ICU UnicodeString as input.</P>
|
||||
<P><STRONG>Example: </STRONG>search for a character sequence in a specific
|
||||
language block:
|
||||
</P>
|
||||
<PRE>UnicodeString extract_greek(const UnicodeString& text)
|
||||
{
|
||||
// searches through some UTF-16 encoded text for a block encoded in Greek,
|
||||
// this expression is imperfect, but the best we can do for now - searching
|
||||
// for specific scripts is actually pretty hard to do right.
|
||||
//
|
||||
// Here we search for a character sequence that begins with a Greek letter,
|
||||
// and continues with characters that are either not-letters ( [^[:L*:]] )
|
||||
// or are characters in the Greek character block ( [\\x{370}-\\x{3FF}] ).
|
||||
//
|
||||
boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
|
||||
boost::u16match what;
|
||||
if(boost::u32regex_search(text, what, r))
|
||||
{
|
||||
// extract $0 as a CString:
|
||||
return UnicodeString(what[0].first, what.length(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("No Greek found!");
|
||||
}
|
||||
}</PRE>
|
||||
<H4><A name="u32regex_replace"></A>u32regex_replace</H4>
|
||||
<P>For each <A href="regex_replace.html">regex_replace</A> algorithm defined by
|
||||
<boost/regex.hpp>, then <boost/regex/icu.hpp> defines an overloaded
|
||||
algorithm that takes the same arguments, but which is called <EM>u32regex_replace</EM>,
|
||||
and which will accept UTF-8, UTF-16 or UTF-32 encoded data, as well as an
|
||||
ICU UnicodeString as input. The input sequence and the format string
|
||||
specifier passed to the algorithm, can be encoded independently (for example
|
||||
one can be UTF-8, the other in UTF-16), but the result string / output iterator
|
||||
argument must use the same character encoding as the text being searched.</P>
|
||||
<P><STRONG>Example: </STRONG>Credit card number reformatting:</P>
|
||||
<PRE>//
|
||||
// Take a credit card number as a string of digits,
|
||||
// and reformat it as a human readable string with "-"
|
||||
// separating each group of four digit;,
|
||||
// note that we're mixing a UTF-32 regex, with a UTF-16
|
||||
// string and a UTF-8 format specifier, and it still all
|
||||
// just works:
|
||||
//
|
||||
const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
|
||||
const char* human_format = "$1-$2-$3-$4";
|
||||
|
||||
UnicodeString human_readable_card_number(const UnicodeString& s)
|
||||
{
|
||||
return boost::u32regex_replace(s, e, human_format);
|
||||
}</PRE>
|
||||
<P>
|
||||
<H2><A name="iterators"></A>Iterators</H2>
|
||||
<H3><A name="u32regex_iterator"></A>u32regex_iterator</H3>
|
||||
<P>Type u32regex_iterator is in all respects the same as <A href="regex_iterator.html">
|
||||
regex_iterator</A> except that since the regular expression type is always
|
||||
u32regex it only takes one template parameter (the iterator type). It also
|
||||
calls u32regex_search internally, allowing it to interface correctly with
|
||||
UTF-8, UTF-16, and UTF-32 data:</P>
|
||||
<PRE>
|
||||
template <class BidirectionalIterator>
|
||||
class u32regex_iterator
|
||||
{
|
||||
// for members see <A href="regex_iterator.html">regex_iterator</A>
|
||||
};
|
||||
|
||||
typedef u32regex_iterator<const char*> utf8regex_iterator;
|
||||
typedef u32regex_iterator<const UChar*> utf16regex_iterator;
|
||||
typedef u32regex_iterator<const UChar32*> utf32regex_iterator;
|
||||
</PRE>
|
||||
<P>In order to simplify the construction of a u32regex_iterator from a string,
|
||||
there are a series of non-member helper functions called
|
||||
make_u32regex_iterator:</P>
|
||||
<PRE>
|
||||
u32regex_iterator<const char*>
|
||||
make_u32regex_iterator(const char* s,
|
||||
const u32regex& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_iterator<const wchar_t*>
|
||||
make_u32regex_iterator(const wchar_t* s,
|
||||
const u32regex& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_iterator<const UChar*>
|
||||
make_u32regex_iterator(const UChar* s,
|
||||
const u32regex& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class Traits, class Alloc>
|
||||
u32regex_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator>
|
||||
make_u32regex_iterator(const std::basic_string<charT, Traits, Alloc>& s,
|
||||
const u32regex& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_iterator<const UChar*>
|
||||
make_u32regex_iterator(const UnicodeString& s,
|
||||
const u32regex& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);</PRE>
|
||||
<P>
|
||||
<P>Each of these overloads returns an iterator that enumerates all occurrences of
|
||||
expression <EM>e</EM>, in text <EM>s</EM>, using match_flags <EM>m.</EM></P>
|
||||
<P><STRONG>Example</STRONG>: search for international currency symbols, along with
|
||||
their associated numeric value:</P>
|
||||
<PRE>
|
||||
void enumerate_currencies(const std::string& text)
|
||||
{
|
||||
// enumerate and print all the currency symbols, along
|
||||
// with any associated numeric values:
|
||||
const char* re =
|
||||
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
|
||||
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
|
||||
"(?(1)"
|
||||
"|(?(2)"
|
||||
"[[:Cf:][:Cc:][:Z*:]]*"
|
||||
")"
|
||||
"[[:Sc:]]"
|
||||
")";
|
||||
boost::u32regex r = boost::make_u32regex(re);
|
||||
boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
|
||||
while(i != j)
|
||||
{
|
||||
std::cout << (*i)[0] << std::endl;
|
||||
++i;
|
||||
}
|
||||
}</PRE>
|
||||
<P>
|
||||
<P>Calling
|
||||
</P>
|
||||
<PRE>enumerate_currencies(" $100.23 or <20>198.12 ");</PRE>
|
||||
<P>Yields the output:</P>
|
||||
<PRE>$100.23<BR><EFBFBD>198.12</PRE>
|
||||
<P>Provided of course that the input is encoded as UTF-8.</P>
|
||||
<H3><A name="u32regex_token_iterator"></A>u32regex_token_iterator</H3>
|
||||
<P>Type u32regex_token_iterator is in all respects the same as <A href="regex_token_iterator.html">
|
||||
regex_token_iterator</A> except that since the regular expression type is
|
||||
always u32regex it only takes one template parameter (the iterator type).
|
||||
It also calls u32regex_search internally, allowing it to interface correctly
|
||||
with UTF-8, UTF-16, and UTF-32 data:</P>
|
||||
<PRE>template <class BidirectionalIterator>
|
||||
class u32regex_token_iterator
|
||||
{
|
||||
// for members see <A href="regex_token_iterator.html">regex_token_iterator</A>
|
||||
};
|
||||
|
||||
typedef u32regex_token_iterator<const char*> utf8regex_token_iterator;
|
||||
typedef u32regex_token_iterator<const UChar*> utf16regex_token_iterator;
|
||||
typedef u32regex_token_iterator<const UChar32*> utf32regex_token_iterator;
|
||||
</PRE>
|
||||
<P>In order to simplify the construction of a u32regex_token_iterator from a
|
||||
string, there are a series of non-member helper functions called
|
||||
make_u32regex_token_iterator:</P>
|
||||
<PRE>
|
||||
u32regex_token_iterator<const char*>
|
||||
make_u32regex_token_iterator(const char* s,
|
||||
const u32regex& e,
|
||||
int sub,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const wchar_t*>
|
||||
make_u32regex_token_iterator(const wchar_t* s,
|
||||
const u32regex& e,
|
||||
int sub,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UChar* s,
|
||||
const u32regex& e,
|
||||
int sub,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class Traits, class Alloc>
|
||||
u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator>
|
||||
make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& s,
|
||||
const u32regex& e,
|
||||
int sub,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UnicodeString& s,
|
||||
const u32regex& e,
|
||||
int sub,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);</PRE>
|
||||
<P>
|
||||
<P>Each of these overloads returns an iterator that enumerates all occurrences of
|
||||
marked sub-expression <EM>sub</EM> in regular expression <EM>e</EM>, found
|
||||
in text <EM>s</EM>, using match_flags <EM>m.</EM></P>
|
||||
<PRE>
|
||||
template <std::size_t N>
|
||||
u32regex_token_iterator<const char*>
|
||||
make_u32regex_token_iterator(const char* p,
|
||||
const u32regex& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <std::size_t N>
|
||||
u32regex_token_iterator<const wchar_t*>
|
||||
make_u32regex_token_iterator(const wchar_t* p,
|
||||
const u32regex& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <std::size_t N>
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UChar* p,
|
||||
const u32regex& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class Traits, class Alloc, std::size_t N>
|
||||
u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator>
|
||||
make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p,
|
||||
const u32regex& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <std::size_t N>
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UnicodeString& s,
|
||||
const u32regex& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
</PRE>
|
||||
<P>Each of these overloads returns an iterator that enumerates one sub-expression
|
||||
for each <EM>submatch</EM> in regular expression <EM>e</EM>, found in
|
||||
text <EM>s</EM>, using match_flags <EM>m.</EM></P>
|
||||
<PRE>
|
||||
u32regex_token_iterator<const char*>
|
||||
make_u32regex_token_iterator(const char* p,
|
||||
const u32regex& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const wchar_t*>
|
||||
make_u32regex_token_iterator(const wchar_t* p,
|
||||
const u32regex& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UChar* p,
|
||||
const u32regex& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class Traits, class Alloc>
|
||||
u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator>
|
||||
make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p,
|
||||
const u32regex& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
u32regex_token_iterator<const UChar*>
|
||||
make_u32regex_token_iterator(const UnicodeString& s,
|
||||
const u32regex& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
</PRE>
|
||||
<P>Each of these overloads returns an iterator that enumerates one sub-expression
|
||||
for each <EM>submatch</EM> in regular expression <EM>e</EM>, found in
|
||||
text <EM>s</EM>, using match_flags <EM>m.</EM></P>
|
||||
<P><STRONG>Example</STRONG>: search for international currency symbols, along with
|
||||
their associated numeric value:</P>
|
||||
<PRE>
|
||||
void enumerate_currencies2(const std::string& text)
|
||||
{
|
||||
// enumerate and print all the currency symbols, along
|
||||
// with any associated numeric values:
|
||||
const char* re =
|
||||
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
|
||||
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
|
||||
"(?(1)"
|
||||
"|(?(2)"
|
||||
"[[:Cf:][:Cc:][:Z*:]]*"
|
||||
")"
|
||||
"[[:Sc:]]"
|
||||
")";
|
||||
boost::u32regex r = boost::make_u32regex(re);
|
||||
boost::u32regex_token_iterator<std::string::const_iterator>
|
||||
i(boost::make_u32regex_token_iterator(text, r, 1)), j;
|
||||
while(i != j)
|
||||
{
|
||||
std::cout << *i << std::endl;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
</PRE>
|
||||
<P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
05 Jan 2005
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2005</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -1,43 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Implementation</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Implementation</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Todo.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
260
doc/install.html
260
doc/install.html
@ -1,260 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Installation</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Installation</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<P>When you extract the library from its zip file, you must preserve its internal
|
||||
directory structure (for example by using the -d option when extracting). If
|
||||
you didn't do that when extracting, then you'd better stop reading this, delete
|
||||
the files you just extracted, and try again!
|
||||
</P>
|
||||
<P>This library should not need configuring before use; most popular
|
||||
compilers/standard libraries/platforms are already supported "as is". If you do
|
||||
experience configuration problems, or just want to test the configuration with
|
||||
your compiler, then the process is the same as for all of boost; see the <A href="../../config/config.htm">
|
||||
configuration library documentation</A>.</P>
|
||||
<P>The library will encase all code inside namespace boost.
|
||||
</P>
|
||||
<P>Unlike some other template libraries, this library consists of a mixture of
|
||||
template code (in the headers) and static code and data (in cpp files).
|
||||
Consequently it is necessary to build the library's support code into a library
|
||||
or archive file before you can use it, instructions for specific platforms are
|
||||
as follows:
|
||||
</P>
|
||||
<H3><A name="bjam"></A>Building with bjam</H3>
|
||||
<P>This is now the preferred method for building and installing this library,
|
||||
please refer to the <A href="../../../more/getting_started.html">getting started
|
||||
guide</A> for more information.</P>
|
||||
<H3><A name="unicode"></A>Building With Unicode and ICU Support</H3>
|
||||
<P>A default build of this library does not enable <A href="unicode.html">Unciode
|
||||
support</A> via ICU. There is no need to enable this support if you
|
||||
don't need it, but if you use ICU for your Unicode support already, and want to
|
||||
work with Unicode-aware regular expressions then read on.</P>
|
||||
<P>Most of the information you will need is in the <A href="../../../more/getting_started.html">
|
||||
getting started guide</A>, the only additional step you need to take is to
|
||||
tell bjam that you want Boost.Regex to use ICU and optionally to tell bjam
|
||||
where ICU is located.</P>
|
||||
<P>If you're building on a Unix-like platform, and ICU is already installed in
|
||||
you're compilers search path (with an install prefix of /usr or /usr/local
|
||||
for example), then set the environment variable HAVE_ICU to enable ICU
|
||||
support. For example you might build with the command line:</P>
|
||||
<PRE>bjam -sHAVE_ICU=1 --toolset=<A href="../../../more/getting_started/windows.html#toolset-name">toolset-name</A> install</PRE>
|
||||
<P>If ICU is not already in your compilers path then you need to set the
|
||||
environment variable ICU_PATH to point to the route directory of your ICU
|
||||
installation, for example if ICU was installed to /usr/local/icu/3.3 you might
|
||||
use:</P>
|
||||
<PRE>bjam -sICU_PATH=/usr/local/icu/3.3 --toolset=<A href="../../../more/getting_started/windows.html#toolset-name">toolset-name</A> install</PRE>
|
||||
<P>Note that ICU is a C++ library just like Boost is, as such your copy of ICU
|
||||
must have been built with the same C++ compiler (and compiler version) that you
|
||||
are using to build Boost. <STRONG>Boost.Regex will not work correctly unless
|
||||
you ensure that this is the case:</STRONG> it is up to you to ensure that
|
||||
the version of ICU you are using is binary compatible with the toolset you use
|
||||
to build Boost.</P>
|
||||
<H2><A name="make"></A>Building via makefiles</H2>
|
||||
<H3><A name="bcb"></A>Borland C++ Builder:
|
||||
</H3>
|
||||
<UL>
|
||||
<LI>
|
||||
Open up a console window and change to the <boost>\libs\regex\build
|
||||
directory.
|
||||
<LI>
|
||||
Select the appropriate makefile (bcb4.mak for C++ Builder 4, bcb5.mak for C++
|
||||
Builder 5, and bcb6.mak for C++ Builder 6).
|
||||
<LI>
|
||||
Invoke the makefile (pass the full path to your version of make if you have
|
||||
more than one version installed, the makefile relies on the path to make to
|
||||
obtain your C++ Builder installation directory and tools) for example:
|
||||
</LI>
|
||||
</UL>
|
||||
<PRE>make -fbcb5.mak</PRE>
|
||||
<P>The build process will build a variety of .lib and .dll files (the exact number
|
||||
depends upon the version of Borland's tools you are using) the .lib and dll
|
||||
files will be in a sub-directory called bcb4 or bcb5 depending upon the
|
||||
makefile used. To install the libraries into your development system use:</P>
|
||||
<PRE>make -fbcb5.mak install</PRE>
|
||||
<P>library files will be copied to <BCROOT>/lib and the dll's to
|
||||
<BCROOT>/bin, where <BCROOT> corresponds to the install path of
|
||||
your Borland C++ tools.
|
||||
</P>
|
||||
<P>You may also remove temporary files created during the build process (excluding
|
||||
lib and dll files) by using:</P>
|
||||
<PRE>make -fbcb5.mak clean</PRE>
|
||||
<P>Finally when you use regex++ it is only necessary for you to add the
|
||||
<boost> root director to your list of include directories for that
|
||||
project. It is not necessary for you to manually add a .lib file to the
|
||||
project; the headers will automatically select the correct .lib file for your
|
||||
build mode and tell the linker to include it. There is one caveat however: the
|
||||
library can not tell the difference between VCL and non-VCL enabled builds when
|
||||
building a GUI application from the command line, if you build from the command
|
||||
line with the 5.5 command line tools then you must define the pre-processor
|
||||
symbol _NO_VCL in order to ensure that the correct link libraries are selected:
|
||||
the C++ Builder IDE normally sets this automatically. Hint, users of the 5.5
|
||||
command line tools may want to add a -D_NO_VCL to bcc32.cfg in order to set
|
||||
this option permanently.
|
||||
</P>
|
||||
<P>If you would prefer to do a dynamic link to the regex libraries when using the
|
||||
dll runtime then define BOOST_REGEX_DYN_LINK (you must do this if you want to
|
||||
use boost.regex in multiple dll's), otherwise Boost.regex will be statically
|
||||
linked by default. </P>
|
||||
<P>If you want to suppress automatic linking altogether (and supply your own
|
||||
custom build of the lib) then define BOOST_REGEX_NO_LIB.</P>
|
||||
<P>If you are building with C++ Builder 6, you will find that
|
||||
<boost/regex.hpp> can not be used in a pre-compiled header (the actual
|
||||
problem is in <locale> which gets included by <boost/regex.hpp>),
|
||||
if this causes problems for you, then try defining BOOST_NO_STD_LOCALE when
|
||||
building, this will disable some features throughout boost, but may save you a
|
||||
lot in compile times!</P>
|
||||
<H3><A name="vc"></A>Microsoft Visual C++ 6, 7, 7.1 and 8</H3>
|
||||
<P>You need version 6 of MSVC to build this library. If you are using VC5 then you
|
||||
may want to look at one of the previous releases of this <A href="http://ourworld.compuserve.com/homepages/john_maddock/regexpp.htm">
|
||||
library</A>
|
||||
</P>
|
||||
<P>Open up a command prompt, which has the necessary MSVC environment variables
|
||||
defined (for example by using the batch file Vcvars32.bat installed by the
|
||||
Visual Studio installation), and change to the <boost>\libs\regex\build
|
||||
directory.
|
||||
</P>
|
||||
<P>Select the correct makefile - vc6.mak for "vanilla" Visual C++ 6 or
|
||||
vc6-stlport.mak if you are using STLPort.</P>
|
||||
<P>Invoke the makefile like this:</P>
|
||||
<PRE>nmake -fvc6.mak</PRE>
|
||||
<P>You will now have a collection of lib and dll files in a "vc6" subdirectory, to
|
||||
install these into your development system use:</P>
|
||||
<PRE>nmake -fvc6.mak install</PRE>
|
||||
<P>The lib files will be copied to your <VC6>\lib directory and the dll
|
||||
files to <VC6>\bin, where <VC6> is the root of your Visual C++ 6
|
||||
installation.</P>
|
||||
<P>You can delete all the temporary files created during the build (excluding lib
|
||||
and dll files) using:</P>
|
||||
<PRE>nmake -fvc6.mak clean </PRE>
|
||||
<P>If you want to build with ICU support, then you need to pass the path to your
|
||||
ICU directory to the makefile, for example with:
|
||||
</P>
|
||||
<PRE>nmake ICU_PATH=c:\open-source\icu -fvc71.mak install</PRE>
|
||||
<P>Finally when you use regex++ it is only necessary for you to add the
|
||||
<boost> root directory to your list of include directories for that
|
||||
project. It is not necessary for you to manually add a .lib file to the
|
||||
project; the headers will automatically select the correct .lib file for your
|
||||
build mode and tell the linker to include it.
|
||||
</P>
|
||||
<P>Note that if you want to dynamically link to the regex library when using the
|
||||
dynamic C++ runtime, define BOOST_REGEX_DYN_LINK when building your project.</P>
|
||||
<P>If you want to add the source directly to your project then define
|
||||
BOOST_REGEX_NO_LIB to disable automatic library selection.</P>
|
||||
<P>There are several important caveats to remember when using boost.regex with
|
||||
Microsoft's Compiler:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
There have been some reports of compiler-optimization bugs affecting this
|
||||
library, (particularly with VC6 versions prior to service patch 5) the
|
||||
workaround is to build the library using /Oityb1 rather than /O2. That is to
|
||||
use all optimization settings except /Oa. This problem is reported to affect
|
||||
some standard library code as well (in fact I'm not sure if the problem is with
|
||||
the regex code or the underlying standard library), so it's probably worthwhile
|
||||
applying this workaround in normal practice in any case.
|
||||
<LI>
|
||||
If you have replaced the C++ standard library that comes with VC6, then when
|
||||
you build the library you must ensure that the environment variables "INCLUDE"
|
||||
and "LIB" have been updated to reflect the include and library paths for the
|
||||
new library - see vcvars32.bat (part of your Visual Studio installation) for
|
||||
more details.
|
||||
<LI>
|
||||
If you are building with the full STLPort v4.x, then use the vc6-stlport.mak
|
||||
file provided and set the environment variable STLPORT_PATH to point to the
|
||||
location of your STLPort installation (Note that the full STLPort libraries
|
||||
appear not to support single-thread static builds).
|
||||
<LI>
|
||||
If you are building your application with /Zc:wchar_t then you will need to
|
||||
modify the makefile to add /Zc:wchar_t before building the library.
|
||||
</LI>
|
||||
</UL>
|
||||
<H3><A name="gcc"></A>GCC(2.95 and 3.x)
|
||||
</H3>
|
||||
<P>You can build with gcc using the normal boost Jamfile in
|
||||
<boost>/libs/regex/build, alternatively there is a conservative makefile
|
||||
for the g++ compiler. From the command prompt change to the
|
||||
<boost>/libs/regex/build directory and type:
|
||||
</P>
|
||||
<PRE>make -fgcc.mak </PRE>
|
||||
<P>At the end of the build process you should have a gcc sub-directory containing
|
||||
release and debug versions of the library (libboost_regex.a and
|
||||
libboost_regex_debug.a). When you build projects that use regex++, you will
|
||||
need to add the boost install directory to your list of include paths and add
|
||||
<boost>/libs/regex/build/gcc/libboost_regex.a to your list of library
|
||||
files.
|
||||
</P>
|
||||
<P>There is also a makefile to build the library as a shared library:</P>
|
||||
<PRE>make -fgcc-shared.mak</PRE>
|
||||
<P>which will build libboost_regex.so and libboost_regex_debug.so.</P>
|
||||
<P>Both of the these makefiles support the following environment variables:</P>
|
||||
<P>ICU_PATH: tells the makefile to build with Unicode support, set to the path
|
||||
where your ICU installation is located, for example with: <code>make
|
||||
ICU_PATH=/usr/local install -fgcc.mak</code></P>
|
||||
<P>CXXFLAGS: extra compiler options - note that this applies to both the debug and
|
||||
release builds.</P>
|
||||
<P>INCLUDES: additional include directories.</P>
|
||||
<P>LDFLAGS: additional linker options.</P>
|
||||
<P>LIBS: additional library files.</P>
|
||||
<P>For the more adventurous there is a configure script in
|
||||
<boost>/libs/config; see the <A href="../../config/config.htm">config
|
||||
library documentation</A>.</P>
|
||||
<H3><A name="sun"></A>Sun Workshop 6.1</H3>
|
||||
<P>There is a makefile for the sun (6.1) compiler (C++ version 3.12). From the
|
||||
command prompt change to the <boost>/libs/regex/build directory and type:
|
||||
</P>
|
||||
<PRE>dmake -f sunpro.mak </PRE>
|
||||
<P>At the end of the build process you should have a sunpro sub-directory
|
||||
containing single and multithread versions of the library (libboost_regex.a,
|
||||
libboost_regex.so, libboost_regex_mt.a and libboost_regex_mt.so). When you
|
||||
build projects that use regex++, you will need to add the boost install
|
||||
directory to your list of include paths and add
|
||||
<boost>/libs/regex/build/sunpro/ to your library search path.
|
||||
</P>
|
||||
<P>Both of the these makefiles support the following environment variables:</P>
|
||||
<P>CXXFLAGS: extra compiler options - note that this applies to both the single
|
||||
and multithreaded builds.</P>
|
||||
<P>INCLUDES: additional include directories.</P>
|
||||
<P>LDFLAGS: additional linker options.</P>
|
||||
<P>LIBS: additional library files.</P>
|
||||
<P>LIBSUFFIX: a suffix to mangle the library name with (defaults to nothing).</P>
|
||||
<P>This makefile does not set any architecture specific options like -xarch=v9,
|
||||
you can set these by defining the appropriate macros, for example:</P>
|
||||
<PRE>dmake CXXFLAGS="-xarch=v9" LDFLAGS="-xarch=v9" LIBSUFFIX="_v9" -f sunpro.mak</PRE>
|
||||
<P>will build v9 variants of the regex library named libboost_regex_v9.a etc.</P>
|
||||
<H3><A name="other"></A>Makefiles for Other compilers:
|
||||
</H3>
|
||||
<P>There is a generic makefile (<A href="../build/generic.mak">generic.mak</A> )
|
||||
provided in <boost-root>/libs/regex/build - see that makefile for details
|
||||
of environment variables that need to be set before use.
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
09 Jan 2005
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2005<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,181 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Introduction</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Introduction</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Regular expressions are a form of pattern-matching that are often used in text
|
||||
processing; many users will be familiar with the Unix utilities <I>grep</I>, <I>sed</I>
|
||||
and <I>awk</I>, and the programming language <I>Perl</I>, each of which make
|
||||
extensive use of regular expressions. Traditionally C++ users have been limited
|
||||
to the POSIX C API's for manipulating regular expressions, and while regex++
|
||||
does provide these API's, they do not represent the best way to use the
|
||||
library. For example regex++ can cope with wide character strings, or search
|
||||
and replace operations (in a manner analogous to either sed or Perl), something
|
||||
that traditional C libraries can not do.</P>
|
||||
<P>The class <A href="basic_regex.html">boost::basic_regex</A> is the key class in
|
||||
this library; it represents a "machine readable" regular expression, and is
|
||||
very closely modeled on std::basic_string, think of it as a string plus the
|
||||
actual state-machine required by the regular expression algorithms. Like
|
||||
std::basic_string there are two typedefs that are almost always the means by
|
||||
which this class is referenced:</P>
|
||||
<pre><B>namespace </B>boost{
|
||||
|
||||
<B>template</B> <<B>class</B> charT,
|
||||
<B> class</B> traits = regex_traits<charT> >
|
||||
<B>class</B> basic_regex;
|
||||
|
||||
<B>typedef</B> basic_regex<<B>char</B>> regex;
|
||||
<B>typedef</B> basic_regex<<B>wchar_t></B> wregex;
|
||||
|
||||
}</pre>
|
||||
<P>To see how this library can be used, imagine that we are writing a credit card
|
||||
processing application. Credit card numbers generally come as a string of
|
||||
16-digits, separated into groups of 4-digits, and separated by either a space
|
||||
or a hyphen. Before storing a credit card number in a database (not necessarily
|
||||
something your customers will appreciate!), we may want to verify that the
|
||||
number is in the correct format. To match any digit we could use the regular
|
||||
expression [0-9], however ranges of characters like this are actually locale
|
||||
dependent. Instead we should use the POSIX standard form [[:digit:]], or the
|
||||
regex++ and Perl shorthand for this \d (note that many older libraries tended
|
||||
to be hard-coded to the C-locale, consequently this was not an issue for them).
|
||||
That leaves us with the following regular expression to validate credit card
|
||||
number formats:</P>
|
||||
<PRE>(\d{4}[- ]){3}\d{4}</PRE>
|
||||
<P>Here the parenthesis act to group (and mark for future reference)
|
||||
sub-expressions, and the {4} means "repeat exactly 4 times". This is an example
|
||||
of the extended regular expression syntax used by Perl, awk and egrep. Regex++
|
||||
also supports the older "basic" syntax used by sed and grep, but this is
|
||||
generally less useful, unless you already have some basic regular expressions
|
||||
that you need to reuse.</P>
|
||||
<P>Now let's take that expression and place it in some C++ code to validate the
|
||||
format of a credit card number:</P>
|
||||
<PRE><B>bool</B> validate_card_format(<B>const</B> std::string& s)
|
||||
{
|
||||
<B>static</B> <B>const</B> <A href="basic_regex.html">boost::regex</A> e("(\\d{4}[- ]){3}\\d{4}");
|
||||
<B>return</B> <A href="regex_match.html">regex_match</A>(s, e);
|
||||
}</PRE>
|
||||
<P>Note how we had to add some extra escapes to the expression: remember that the
|
||||
escape is seen once by the C++ compiler, before it gets to be seen by the
|
||||
regular expression engine, consequently escapes in regular expressions have to
|
||||
be doubled up when embedding them in C/C++ code. Also note that all the
|
||||
examples assume that your compiler supports Koenig lookup, if yours doesn't
|
||||
(for example VC6), then you will have to add some boost:: prefixes to some of
|
||||
the function calls in the examples.</P>
|
||||
<P>Those of you who are familiar with credit card processing, will have realized
|
||||
that while the format used above is suitable for human readable card numbers,
|
||||
it does not represent the format required by online credit card systems; these
|
||||
require the number as a string of 16 (or possibly 15) digits, without any
|
||||
intervening spaces. What we need is a means to convert easily between the two
|
||||
formats, and this is where search and replace comes in. Those who are familiar
|
||||
with the utilities <I>sed</I> and <I>Perl</I> will already be ahead here; we
|
||||
need two strings - one a regular expression - the other a "<A href="format_syntax.html">format
|
||||
string</A>" that provides a description of the text to replace the match
|
||||
with. In regex++ this search and replace operation is performed with the
|
||||
algorithm<A href="regex_replace.html"> regex_replace</A>, for our credit card
|
||||
example we can write two algorithms like this to provide the format
|
||||
conversions:</P>
|
||||
<PRE><I>// match any format with the regular expression:
|
||||
</I><B>const</B> boost::regex e("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
|
||||
<B>const</B> std::string machine_format("\\1\\2\\3\\4");
|
||||
<B>const</B> std::string human_format("\\1-\\2-\\3-\\4");
|
||||
|
||||
std::string machine_readable_card_number(<B>const</B> std::string s)
|
||||
{
|
||||
<B>return</B> <A href="regex_replace.html">regex_replace</A>(s, e, machine_format, boost::match_default | boost::format_sed);
|
||||
}
|
||||
|
||||
std::string human_readable_card_number(<B>const</B> std::string s)
|
||||
{
|
||||
<B>return</B> <A href="regex_replace.html">regex_replace</A>(s, e, human_format, boost::match_default | boost::format_sed);
|
||||
}</PRE>
|
||||
<P>Here we've used marked sub-expressions in the regular expression to split out
|
||||
the four parts of the card number as separate fields, the format string then
|
||||
uses the sed-like syntax to replace the matched text with the reformatted
|
||||
version.</P>
|
||||
<P>In the examples above, we haven't directly manipulated the results of a regular
|
||||
expression match, however in general the result of a match contains a number of
|
||||
sub-expression matches in addition to the overall match. When the library needs
|
||||
to report a regular expression match it does so using an instance of the class <A href="match_results.html">
|
||||
match_results</A>, as before there are typedefs of this class for the most
|
||||
common cases:
|
||||
</P>
|
||||
<PRE><B>namespace </B>boost{
|
||||
<B>typedef</B> match_results<<B>const</B> <B>char</B>*> cmatch;
|
||||
<B>typedef</B> match_results<<B>const</B> <B>wchar_t</B>*> wcmatch;
|
||||
<STRONG>typedef</STRONG> match_results<std::string::const_iterator> smatch;
|
||||
<STRONG>typedef</STRONG> match_results<std::wstring::const_iterator> wsmatch;
|
||||
}</PRE>
|
||||
<P>The algorithms <A href="regex_search.html">regex_search</A> and <A href="regex_match.html">regex_match</A>
|
||||
make use of match_results to report what matched; the difference between these
|
||||
algorithms is that <A href="regex_match.html">regex_match</A> will only find
|
||||
matches that consume <EM>all</EM> of the input text, where as <A href="regex_search.html">
|
||||
regex_search</A> will <EM>search</EM> for a match anywhere within the text
|
||||
being matched.</P>
|
||||
<P>Note that these algorithms are not restricted to searching regular C-strings,
|
||||
any bidirectional iterator type can be searched, allowing for the possibility
|
||||
of seamlessly searching almost any kind of data.
|
||||
</P>
|
||||
<P>For search and replace operations, in addition to the algorithm <A href="regex_replace.html">
|
||||
regex_replace</A> that we have already seen, the <A href="match_results.html">match_results</A>
|
||||
class has a format member that takes the result of a match and a format string,
|
||||
and produces a new string by merging the two.</P>
|
||||
<P>For iterating through all occurences of an expression within a text, there are
|
||||
two iterator types: <A href="regex_iterator.html">regex_iterator</A> will
|
||||
enumerate over the <A href="match_results.html">match_results</A> objects
|
||||
found, while <A href="regex_token_iterator.html">regex_token_iterator</A> will
|
||||
enumerate a series of strings (similar to perl style split operations).</P>
|
||||
<P>For those that dislike templates, there is a high level wrapper class RegEx
|
||||
that is an encapsulation of the lower level template code - it provides a
|
||||
simplified interface for those that don't need the full power of the library,
|
||||
and supports only narrow characters, and the "extended" regular expression
|
||||
syntax. This class is now deprecated as it does not form part of the regular
|
||||
expressions C++ standard library proposal.
|
||||
</P>
|
||||
<P>The <A href="posix_api.html">POSIX API</A> functions: regcomp, regexec, regfree
|
||||
and regerror, are available in both narrow character and Unicode versions, and
|
||||
are provided for those who need compatibility with these API's.
|
||||
</P>
|
||||
<P>Finally, note that the library now has run-time <A href="localisation.html">localization</A>
|
||||
support, and recognizes the full POSIX regular expression syntax - including
|
||||
advanced features like multi-character collating elements and equivalence
|
||||
classes - as well as providing compatibility with other regular expression
|
||||
libraries including GNU and BSD4 regex packages, and to a more limited extent
|
||||
Perl 5.
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,808 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Localisation</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Localisation</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<p>Boost.regex provides extensive support for run-time localization, the
|
||||
localization model used can be split into two parts: front-end and back-end.</p>
|
||||
<p>Front-end localization deals with everything which the user sees - error
|
||||
messages, and the regular expression syntax itself. For example a French
|
||||
application could change [[:word:]] to [[:mot:]] and \w to \m. Modifying the
|
||||
front end locale requires active support from the developer, by providing the
|
||||
library with a message catalogue to load, containing the localized strings.
|
||||
Front-end locale is affected by the LC_MESSAGES category only.</p>
|
||||
<p>Back-end localization deals with everything that occurs after the expression
|
||||
has been parsed - in other words everything that the user does not see or
|
||||
interact with directly. It deals with case conversion, collation, and character
|
||||
class membership. The back-end locale does not require any intervention from
|
||||
the developer - the library will acquire all the information it requires for
|
||||
the current locale from the underlying operating system / run time library.
|
||||
This means that if the program user does not interact with regular expressions
|
||||
directly - for example if the expressions are embedded in your C++ code - then
|
||||
no explicit localization is required, as the library will take care of
|
||||
everything for you. For example embedding the expression [[:word:]]+ in your
|
||||
code will always match a whole word, if the program is run on a machine with,
|
||||
for example, a Greek locale, then it will still match a whole word, but in
|
||||
Greek characters rather than Latin ones. The back-end locale is affected by the
|
||||
LC_TYPE and LC_COLLATE categories.</p>
|
||||
<p>There are three separate localization mechanisms supported by boost.regex:</p>
|
||||
<h3>Win32 localization model.</h3>
|
||||
<p>This is the default model when the library is compiled under Win32, and is
|
||||
encapsulated by the traits class w32_regex_traits. When this model is in effect
|
||||
each basic_regex object gets it's own LCID, by default this is the users
|
||||
default setting as returned by GetUserDefaultLCID, but you can call <EM>imbue</EM>
|
||||
on the basic_regex object to set it's locale to some other LCID if you wish.
|
||||
All the settings used by boost.regex are acquired directly from the operating
|
||||
system bypassing the C run time library. Front-end localization requires a
|
||||
resource dll, containing a string table with the user-defined strings. The
|
||||
traits class exports the function:</p>
|
||||
<p>static std::string set_message_catalogue(const std::string& s);</p>
|
||||
<p>which needs to be called with a string identifying the name of the resource
|
||||
dll, <i>before</i> your code compiles any regular expressions (but not
|
||||
necessarily before you construct any <i>basic_regex</i> instances):</p>
|
||||
<p>
|
||||
boost::w32_regex_traits<char>::set_message_catalogue("mydll.dll");</p>
|
||||
<p>
|
||||
The library provides full Unicode support under NT, under Windows 9x the
|
||||
library degrades gracefully - characters 0 to 255 are supported, the remainder
|
||||
are treated as "unknown" graphic characters.</p>
|
||||
<h3>C localization model.</h3>
|
||||
<p>This model has been deprecated in favor of the C++ localoe for all non-Windows
|
||||
compilers that support it. This locale is encapsulated by the traits
|
||||
class <i>c_regex_traits</i>, Win32 users can force this model to take effect by
|
||||
defining the pre-processor symbol BOOST_REGEX_USE_C_LOCALE. When this model is
|
||||
in effect there is a single global locale, as set by <i>setlocale</i>. All
|
||||
settings are acquired from your run time library, consequently Unicode support
|
||||
is dependent upon your run time library implementation.</p>
|
||||
<P>Front end localization is not supported.</P>
|
||||
<P>Note that calling <i>setlocale</i> invalidates all compiled regular
|
||||
expressions, calling <tt>setlocale(LC_ALL, "C")</tt> will make this library
|
||||
behave equivalent to most traditional regular expression libraries including
|
||||
version 1 of this library.</P>
|
||||
<h3>C++ localization model.</h3>
|
||||
<p>This model is the default for non-Windows compilers.</p>
|
||||
<P>
|
||||
When this model is in effect each instance of basic_regex<> has its own
|
||||
instance of std::locale, class basic_regex<> also has a member function <i>imbue</i>
|
||||
which allows the locale for the expression to be set on a per-instance basis.
|
||||
Front end localization requires a POSIX message catalogue, which will be loaded
|
||||
via the std::messages facet of the expression's locale, the traits class
|
||||
exports the symbol:</P>
|
||||
<p>static std::string set_message_catalogue(const std::string& s);</p>
|
||||
<p>which needs to be called with a string identifying the name of the message
|
||||
catalogue, <i>before</i> your code compiles any regular expressions (but not
|
||||
necessarily before you construct any <i>basic_regex</i> instances):</p>
|
||||
<p>
|
||||
boost::cpp_regex_traits<char>::set_message_catalogue("mycatalogue");</p>
|
||||
<p>Note that calling basic_regex<>::imbue will invalidate any expression
|
||||
currently compiled in that instance of basic_regex<>.</p>
|
||||
<P>Finally note that if you build the library with a non-default localization
|
||||
model, then the appropriate pre-processor symbol (BOOST_REGEX_USE_C_LOCALE or
|
||||
BOOST_REGEX_USE_CPP_LOCALE) must be defined both when you build the support
|
||||
library, and when you include <boost/regex.hpp> or
|
||||
<boost/cregex.hpp> in your code. The best way to ensure this is to add
|
||||
the #define to <boost/regex/user.hpp>.</P>
|
||||
<h3>Providing a message catalogue:</h3>
|
||||
<p>
|
||||
In order to localize the front end of the library, you need to provide the
|
||||
library with the appropriate message strings contained either in a resource
|
||||
dll's string table (Win32 model), or a POSIX message catalogue (C++ models). In
|
||||
the latter case the messages must appear in message set zero of the catalogue.
|
||||
The messages and their id's are as follows:<br>
|
||||
</p>
|
||||
<p></p>
|
||||
<table id="Table2" cellspacing="0" cellpadding="6" width="624" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">Message id</td>
|
||||
<td valign="top" width="32%">Meaning</td>
|
||||
<td valign="top" width="29%">Default value</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">101</td>
|
||||
<td valign="top" width="32%">The character used to start a sub-expression.</td>
|
||||
<td valign="top" width="29%">"("</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">102</td>
|
||||
<td valign="top" width="32%">The character used to end a sub-expression
|
||||
declaration.</td>
|
||||
<td valign="top" width="29%">")"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">103</td>
|
||||
<td valign="top" width="32%">The character used to denote an end of line
|
||||
assertion.</td>
|
||||
<td valign="top" width="29%">"$"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">104</td>
|
||||
<td valign="top" width="32%">The character used to denote the start of line
|
||||
assertion.</td>
|
||||
<td valign="top" width="29%">"^"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">105</td>
|
||||
<td valign="top" width="32%">The character used to denote the "match any character
|
||||
expression".</td>
|
||||
<td valign="top" width="29%">"."</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">106</td>
|
||||
<td valign="top" width="32%">The match zero or more times repetition operator.</td>
|
||||
<td valign="top" width="29%">"*"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">107</td>
|
||||
<td valign="top" width="32%">The match one or more repetition operator.</td>
|
||||
<td valign="top" width="29%">"+"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">108</td>
|
||||
<td valign="top" width="32%">The match zero or one repetition operator.</td>
|
||||
<td valign="top" width="29%">"?"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">109</td>
|
||||
<td valign="top" width="32%">The character set opening character.</td>
|
||||
<td valign="top" width="29%">"["</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">110</td>
|
||||
<td valign="top" width="32%">The character set closing character.</td>
|
||||
<td valign="top" width="29%">"]"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">111</td>
|
||||
<td valign="top" width="32%">The alternation operator.</td>
|
||||
<td valign="top" width="29%">"|"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">112</td>
|
||||
<td valign="top" width="32%">The escape character.</td>
|
||||
<td valign="top" width="29%">"\\"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">113</td>
|
||||
<td valign="top" width="32%">The hash character (not currently used).</td>
|
||||
<td valign="top" width="29%">"#"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">114</td>
|
||||
<td valign="top" width="32%">The range operator.</td>
|
||||
<td valign="top" width="29%">"-"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">115</td>
|
||||
<td valign="top" width="32%">The repetition operator opening character.</td>
|
||||
<td valign="top" width="29%">"{"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">116</td>
|
||||
<td valign="top" width="32%">The repetition operator closing character.</td>
|
||||
<td valign="top" width="29%">"}"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">117</td>
|
||||
<td valign="top" width="32%">The digit characters.</td>
|
||||
<td valign="top" width="29%">"0123456789"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">118</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the word boundary assertion.</td>
|
||||
<td valign="top" width="29%">"b"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">119</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the non-word boundary assertion.</td>
|
||||
<td valign="top" width="29%">"B"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">120</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the word-start boundary assertion.</td>
|
||||
<td valign="top" width="29%">"<"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">121</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the word-end boundary assertion.</td>
|
||||
<td valign="top" width="29%">">"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">122</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any word character.</td>
|
||||
<td valign="top" width="29%">"w"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">123</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents a non-word character.</td>
|
||||
<td valign="top" width="29%">"W"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">124</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents a start of buffer assertion.</td>
|
||||
<td valign="top" width="29%">"`A"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">125</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents an end of buffer assertion.</td>
|
||||
<td valign="top" width="29%">"'z"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">126</td>
|
||||
<td valign="top" width="32%">The newline character.</td>
|
||||
<td valign="top" width="29%">"\n"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">127</td>
|
||||
<td valign="top" width="32%">The comma separator.</td>
|
||||
<td valign="top" width="29%">","</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">128</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the bell character.</td>
|
||||
<td valign="top" width="29%">"a"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">129</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the form feed character.</td>
|
||||
<td valign="top" width="29%">"f"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">130</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the newline character.</td>
|
||||
<td valign="top" width="29%">"n"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">131</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the carriage return character.</td>
|
||||
<td valign="top" width="29%">"r"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">132</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the tab character.</td>
|
||||
<td valign="top" width="29%">"t"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">133</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the vertical tab character.</td>
|
||||
<td valign="top" width="29%">"v"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">134</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the start of a hexadecimal character constant.</td>
|
||||
<td valign="top" width="29%">"x"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">135</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the start of an ASCII escape character.</td>
|
||||
<td valign="top" width="29%">"c"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">136</td>
|
||||
<td valign="top" width="32%">The colon character.</td>
|
||||
<td valign="top" width="29%">":"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">137</td>
|
||||
<td valign="top" width="32%">The equals character.</td>
|
||||
<td valign="top" width="29%">"="</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">138</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the ASCII escape character.</td>
|
||||
<td valign="top" width="29%">"e"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">139</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any lower case character.</td>
|
||||
<td valign="top" width="29%">"l"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">140</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any non-lower case character.</td>
|
||||
<td valign="top" width="29%">"L"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">141</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any upper case character.</td>
|
||||
<td valign="top" width="29%">"u"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">142</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any non-upper case character.</td>
|
||||
<td valign="top" width="29%">"U"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">143</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any space character.</td>
|
||||
<td valign="top" width="29%">"s"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">144</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any non-space character.</td>
|
||||
<td valign="top" width="29%">"S"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">145</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any digit character.</td>
|
||||
<td valign="top" width="29%">"d"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">146</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any non-digit character.</td>
|
||||
<td valign="top" width="29%">"D"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">147</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the end quote operator.</td>
|
||||
<td valign="top" width="29%">"E"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">148</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the start quote operator.</td>
|
||||
<td valign="top" width="29%">"Q"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">149</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents a Unicode combining character sequence.</td>
|
||||
<td valign="top" width="29%">"X"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">150</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents any single character.</td>
|
||||
<td valign="top" width="29%">"C"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">151</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents end of buffer operator.</td>
|
||||
<td valign="top" width="29%">"Z"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="21%">152</td>
|
||||
<td valign="top" width="32%">The character which when preceded by an escape
|
||||
character represents the continuation assertion.</td>
|
||||
<td valign="top" width="29%">"G"</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td>153</td>
|
||||
<td>The character which when preceeded by (? indicates a zero width negated
|
||||
forward lookahead assert.</td>
|
||||
<td>!</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p>Custom error messages are loaded as follows: </p>
|
||||
<p></p>
|
||||
<table id="Table3" cellspacing="0" cellpadding="7" width="624" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">Message ID</td>
|
||||
<td valign="top" width="32%">Error message ID</td>
|
||||
<td valign="top" width="31%">Default string</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">201</td>
|
||||
<td valign="top" width="32%">REG_NOMATCH</td>
|
||||
<td valign="top" width="31%">"No match"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">202</td>
|
||||
<td valign="top" width="32%">REG_BADPAT</td>
|
||||
<td valign="top" width="31%">"Invalid regular expression"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">203</td>
|
||||
<td valign="top" width="32%">REG_ECOLLATE</td>
|
||||
<td valign="top" width="31%">"Invalid collation character"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">204</td>
|
||||
<td valign="top" width="32%">REG_ECTYPE</td>
|
||||
<td valign="top" width="31%">"Invalid character class name"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">205</td>
|
||||
<td valign="top" width="32%">REG_EESCAPE</td>
|
||||
<td valign="top" width="31%">"Trailing backslash"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">206</td>
|
||||
<td valign="top" width="32%">REG_ESUBREG</td>
|
||||
<td valign="top" width="31%">"Invalid back reference"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">207</td>
|
||||
<td valign="top" width="32%">REG_EBRACK</td>
|
||||
<td valign="top" width="31%">"Unmatched [ or [^"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">208</td>
|
||||
<td valign="top" width="32%">REG_EPAREN</td>
|
||||
<td valign="top" width="31%">"Unmatched ( or \\("</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">209</td>
|
||||
<td valign="top" width="32%">REG_EBRACE</td>
|
||||
<td valign="top" width="31%">"Unmatched \\{"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">210</td>
|
||||
<td valign="top" width="32%">REG_BADBR</td>
|
||||
<td valign="top" width="31%">"Invalid content of \\{\\}"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">211</td>
|
||||
<td valign="top" width="32%">REG_ERANGE</td>
|
||||
<td valign="top" width="31%">"Invalid range end"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">212</td>
|
||||
<td valign="top" width="32%">REG_ESPACE</td>
|
||||
<td valign="top" width="31%">"Memory exhausted"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">213</td>
|
||||
<td valign="top" width="32%">REG_BADRPT</td>
|
||||
<td valign="top" width="31%">"Invalid preceding regular expression"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">214</td>
|
||||
<td valign="top" width="32%">REG_EEND</td>
|
||||
<td valign="top" width="31%">"Premature end of regular expression"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">215</td>
|
||||
<td valign="top" width="32%">REG_ESIZE</td>
|
||||
<td valign="top" width="31%">"Regular expression too big"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">216</td>
|
||||
<td valign="top" width="32%">REG_ERPAREN</td>
|
||||
<td valign="top" width="31%">"Unmatched ) or \\)"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">217</td>
|
||||
<td valign="top" width="32%">REG_EMPTY</td>
|
||||
<td valign="top" width="31%">"Empty expression"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">218</td>
|
||||
<td valign="top" width="32%">REG_E_UNKNOWN</td>
|
||||
<td valign="top" width="31%">"Unknown error"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p>Custom character class names are loaded as followed: </p>
|
||||
<p></p>
|
||||
<table id="Table4" cellspacing="0" cellpadding="7" width="624" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">Message ID</td>
|
||||
<td valign="top" width="32%">Description</td>
|
||||
<td valign="top" width="31%">Equivalent default class name</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">300</td>
|
||||
<td valign="top" width="32%">The character class name for alphanumeric characters.</td>
|
||||
<td valign="top" width="31%">"alnum"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">301</td>
|
||||
<td valign="top" width="32%">The character class name for alphabetic characters.</td>
|
||||
<td valign="top" width="31%">"alpha"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">302</td>
|
||||
<td valign="top" width="32%">The character class name for control characters.</td>
|
||||
<td valign="top" width="31%">"cntrl"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">303</td>
|
||||
<td valign="top" width="32%">The character class name for digit characters.</td>
|
||||
<td valign="top" width="31%">"digit"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">304</td>
|
||||
<td valign="top" width="32%">The character class name for graphics characters.</td>
|
||||
<td valign="top" width="31%">"graph"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">305</td>
|
||||
<td valign="top" width="32%">The character class name for lower case characters.</td>
|
||||
<td valign="top" width="31%">"lower"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">306</td>
|
||||
<td valign="top" width="32%">The character class name for printable characters.</td>
|
||||
<td valign="top" width="31%">"print"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">307</td>
|
||||
<td valign="top" width="32%">The character class name for punctuation characters.</td>
|
||||
<td valign="top" width="31%">"punct"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">308</td>
|
||||
<td valign="top" width="32%">The character class name for space characters.</td>
|
||||
<td valign="top" width="31%">"space"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">309</td>
|
||||
<td valign="top" width="32%">The character class name for upper case characters.</td>
|
||||
<td valign="top" width="31%">"upper"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">310</td>
|
||||
<td valign="top" width="32%">The character class name for hexadecimal characters.</td>
|
||||
<td valign="top" width="31%">"xdigit"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">311</td>
|
||||
<td valign="top" width="32%">The character class name for blank characters.</td>
|
||||
<td valign="top" width="31%">"blank"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">312</td>
|
||||
<td valign="top" width="32%">The character class name for word characters.</td>
|
||||
<td valign="top" width="31%">"word"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="8%"> </td>
|
||||
<td valign="top" width="22%">313</td>
|
||||
<td valign="top" width="32%">The character class name for Unicode characters.</td>
|
||||
<td valign="top" width="31%">"unicode"</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p>Finally, custom collating element names are loaded starting from message id
|
||||
400, and terminating when the first load thereafter fails. Each message looks
|
||||
something like: "tagname string" where <i>tagname</i> is the name used inside
|
||||
[[.tagname.]] and <i>string</i> is the actual text of the collating element.
|
||||
Note that the value of collating element [[.zero.]] is used for the conversion
|
||||
of strings to numbers - if you replace this with another value then that will
|
||||
be used for string parsing - for example use the Unicode character 0x0660 for
|
||||
[[.zero.]] if you want to use Unicode Arabic-Indic digits in your regular
|
||||
expressions in place of Latin digits.</p>
|
||||
<p>Note that the POSIX defined names for character classes and collating elements
|
||||
are always available - even if custom names are defined, in contrast, custom
|
||||
error messages, and custom syntax messages replace the default ones.</p>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,295 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: match_flag_type</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">match_flag_type</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Synopsis</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2) that controls how a regular expression is matched against a
|
||||
character sequence. The behavior of the format flags is described in more
|
||||
detail in the <A href="format_syntax.html">format syntax guide</A>.</p>
|
||||
<pre>
|
||||
namespace boost{ namespace regex_constants{
|
||||
|
||||
typedef <EM>implemenation-specific-bitmask-type</EM> match_flag_type;
|
||||
|
||||
static const match_flag_type match_default = 0;
|
||||
static const match_flag_type match_not_bob;
|
||||
static const match_flag_type match_not_eob;
|
||||
static const match_flag_type match_not_bol;
|
||||
static const match_flag_type match_not_eol;
|
||||
static const match_flag_type match_not_bow;
|
||||
static const match_flag_type match_not_eow;
|
||||
static const match_flag_type match_any;
|
||||
static const match_flag_type match_not_null;
|
||||
static const match_flag_type match_continuous;
|
||||
static const match_flag_type match_partial;
|
||||
static const match_flag_type match_single_line;
|
||||
static const match_flag_type match_prev_avail;
|
||||
static const match_flag_type match_not_dot_newline;
|
||||
static const match_flag_type match_not_dot_null;
|
||||
|
||||
static const match_flag_type format_default = 0;
|
||||
static const match_flag_type format_sed;
|
||||
static const match_flag_type format_perl;
|
||||
static const match_flag_type format_literal; <BR>
|
||||
static const match_flag_type format_no_copy;
|
||||
static const match_flag_type format_first_only;
|
||||
static const match_flag_type format_all;
|
||||
|
||||
} // namespace regex_constants
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<h3>Description</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). When matching a regular expression against a sequence of
|
||||
characters [first, last) then setting its elements has the effects listed in
|
||||
the table below:</p>
|
||||
<p></p>
|
||||
<table id="Table2" cellspacing="1" cellpadding="7" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p><STRONG>Element</STRONG></p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><STRONG>Effect if set</STRONG></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_default</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that matching of regular expressions proceeds without any
|
||||
modification of the normal rules used in ECMA-262, ECMAScript Language
|
||||
Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects (FWD.1)</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">match_not_bob</td>
|
||||
<td valign="top" width="50%">Specifies that the expressions "\A" and
|
||||
"\`" should not match against the sub-sequence [first,first).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">match_not_eob</td>
|
||||
<td valign="top" width="50%">Specifies that the expressions "\'", "\z" and
|
||||
"\Z" should not match against the sub-sequence [last,last).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_not_bol</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expression "^" should not be matched against the
|
||||
sub-sequence [first,first).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_not_eol</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expression "$" should not be matched against the
|
||||
sub-sequence [last,last).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_not_bow</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expressions "\<" and "\b" should not be matched
|
||||
against the sub-sequence [first,first).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_not_eow</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expressions "\>" and "\b" should not be matched
|
||||
against the sub-sequence [last,last).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_any</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that if more than one match is possible then any match is an
|
||||
acceptable result: this will still find the leftmost match, but may not find
|
||||
the "best" match at that position. Use this flag if you care about the
|
||||
speed of matching, but don't care what was matched (only whether there is one
|
||||
or not).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_not_null</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expression can not be matched against an empty sequence.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_continuous</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that the expression must match a sub-sequence that begins at <i>first</i>.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_partial</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that if no match can be found, then it is acceptable to return a
|
||||
match [from, last) such that from!= last, if there could exist some longer
|
||||
sequence of characters [from,to) of which [from,last) is a prefix, and which
|
||||
would result in a full match.</p>
|
||||
<P>This flag is used when matching incomplete or very long texts, see the <A href="partial_matches.html">
|
||||
partial matches documentation</A> for more information.</P>
|
||||
</td>
|
||||
</tr>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">match_extra</TD>
|
||||
<TD vAlign="top" width="50%">Instructs the matching engine to retain all available <A href="captures.html">
|
||||
capture</A> information; if a capturing group is repeated then information
|
||||
about every repeat is available via <A href="match_results.html#m17">match_results::captures()</A>
|
||||
or <A href="sub_match.html#m8">sub_match_captures().</A></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">match_single_line</TD>
|
||||
<TD vAlign="top" width="50%">Equivalent to the inverse of Perl's m/ modifier;
|
||||
prevents ^ from matching after an embedded newline character (so that it only
|
||||
matches at the start of the text being matched), and $ from matching before an
|
||||
embedded newline (so that it only matches at the end of the text being
|
||||
matched).</TD>
|
||||
</TR>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>match_prev_avail</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that <code>--first</code> is a valid iterator position, when this
|
||||
flag is set then the flags <code>match_not_bol</code> and <code>match_not_bow</code>
|
||||
are ignored by the regular expression algorithms (RE.7) and iterators (RE.8).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">match_not_dot_newline</td>
|
||||
<td valign="top" width="50%">Specifies that the expression "." does not match a
|
||||
newline character. This is the inverse of Perl's s/ modifier.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">match_not_dot_null</td>
|
||||
<td valign="top" width="50%">Specified that the expression "." does not match a
|
||||
character null '\0'.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>format_default</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that when a regular expression match is to be replaced by a new
|
||||
string, that the new string is constructed using the rules used by the
|
||||
ECMAScript replace function in ECMA-262, ECMAScript Language Specification,
|
||||
Chapter 15 part 5.4.11 String.prototype.replace. (FWD.1). In addition during
|
||||
search and replace operations then all non-overlapping occurrences of the
|
||||
regular expression are located and replaced, and sections of the input that did
|
||||
not match the expression, are copied unchanged to the output string.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>format_sed</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>Specifies that when a regular expression match is to be replaced by a new
|
||||
string, that the new string is constructed using the rules used by the Unix sed
|
||||
utility in IEEE Std 1003.1-2001, Portable Operating SystemInterface (POSIX ),
|
||||
Shells and Utilities..</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>format_perl</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>
|
||||
Specifies that when a regular expression match is to be replaced by a new
|
||||
string, that the new string is constructed using the same rules as Perl 5.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">format_literal</TD>
|
||||
<TD vAlign="top" width="50%">Specified that when a regular expression match is to
|
||||
be replaced by a new string, that the new string is a literal copy of the
|
||||
replacement text.</TD>
|
||||
</TR>
|
||||
<tr>
|
||||
<td valign="top" width="50%" height="32">format_all</td>
|
||||
<td valign="top" width="50%" height="32">Specifies that all syntax extensions are
|
||||
enabled, including conditional (?ddexpression1:expression2) replacements: see
|
||||
the <A href="format_syntax.html">format string guide</A> for more details.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>format_no_copy</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>When specified during a search and replace operation, then sections of the
|
||||
character container sequence being searched that do match the regular
|
||||
expression, are not copied to the output string.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>format_first_only</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>When specified during a search and replace operation, then only the first
|
||||
occurrence of the regular expression is replaced.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p></p>
|
||||
<hr>
|
||||
<br>
|
||||
<br>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,459 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: class match_results</title>
|
||||
<meta content="HTML Tidy, see www.w3.org" name="generator">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">class match_results</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Contents</h3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#description">Description</A> </dt>
|
||||
</dl>
|
||||
<h3><a name="synopsis"></a>Synopsis</h3>
|
||||
<p>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>></p>
|
||||
<p>Regular expressions are different from many simple pattern-matching algorithms
|
||||
in that as well as finding an overall match they can also produce
|
||||
sub-expression matches: each sub-expression being delimited in the pattern by a
|
||||
pair of parenthesis (...). There has to be some method for reporting
|
||||
sub-expression matches back to the user: this is achieved this by defining a
|
||||
class <i>match_results</i> that acts as an indexed collection of sub-expression
|
||||
matches, each sub-expression match being contained in an object of type <i><A href="sub_match.html">
|
||||
sub_match</A></i> .</p>
|
||||
<p>Template class match_results denotes a collection of character sequences
|
||||
representing the result of a regular expression match. Objects of type
|
||||
match_results are passed to the algorithms <A href="regex_match.html">regex_match</A>
|
||||
and <A href="regex_search.html">regex_search</A>, and are returned by the
|
||||
iterator <A href="regex_iterator.html">regex_iterator</A> . Storage for
|
||||
the collection is allocated and freed as necessary by the member functions of
|
||||
class match_results.</p>
|
||||
<p>The template class match_results conforms to the requirements of a Sequence, as
|
||||
specified in (lib.sequence.reqmts), except that only operations defined for
|
||||
const-qualified Sequences are supported.</p>
|
||||
<p>Class template match_results is most commonly used as one of the typedefs
|
||||
cmatch, wcmatch, smatch, or wsmatch:</p>
|
||||
<pre>template <class BidirectionalIterator,
|
||||
class Allocator = std::allocator<sub_match<BidirectionalIterator> >
|
||||
class match_results;
|
||||
|
||||
typedef match_results<const char*> cmatch;
|
||||
typedef match_results<const wchar_t*> wcmatch;
|
||||
typedef match_results<string::const_iterator> smatch;
|
||||
typedef match_results<wstring::const_iterator> wsmatch;
|
||||
|
||||
template <class BidirectionalIterator,
|
||||
class Allocator = std::allocator<sub_match<BidirectionalIterator> >
|
||||
class match_results
|
||||
{
|
||||
public:
|
||||
typedef sub_match<BidirectionalIterator> value_type;
|
||||
typedef const value_type& const_reference;
|
||||
typedef const_reference reference;
|
||||
typedef implementation defined const_iterator;
|
||||
typedef const_iterator iterator;
|
||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||
typedef typename Allocator::size_type size_type;
|
||||
typedef Allocator allocator_type;
|
||||
typedef typename iterator_traits<BidirectionalIterator>::value_type char_type;
|
||||
typedef basic_string<char_type> string_type;
|
||||
|
||||
// construct/copy/destroy:
|
||||
explicit <A href="#c1" >match_results</A>(const Allocator& a = Allocator());
|
||||
<A href="#c2" >match_results</A>(const match_results& m);
|
||||
<A href="#c3" >match_results</A>& <A href="#c3" >operator</A>=(const match_results& m);
|
||||
~match_results();
|
||||
|
||||
// size:
|
||||
size_type <A href="#m1" >size</A>() const;
|
||||
size_type <A href="#m2" >max_size</A>() const;
|
||||
bool <A href="#m3" >empty</A>() const;
|
||||
// element access:
|
||||
difference_type <A href="#m4" >length</A>(int sub = 0) const;
|
||||
difference_type <A href="#m5" >position</A>(unsigned int sub = 0) const;
|
||||
string_type <A href="#m6" >str</A>(int sub = 0) const;
|
||||
const_reference <A href="#m7" >operator</A>[](int n) const;
|
||||
|
||||
const_reference <A href="#m8" >prefix</A>() const;
|
||||
|
||||
const_reference <A href="#m9" >suffix</A>() const;
|
||||
const_iterator <A href="#m10" >begin</A>() const;
|
||||
const_iterator <A href="#m11" >end</A>() const;
|
||||
// format:
|
||||
template <class OutputIterator>
|
||||
OutputIterator <A href="#m12" >format</A>(OutputIterator out,
|
||||
const string_type& fmt,
|
||||
match_flag_type flags = format_default) const;
|
||||
string_type <A href="#m13" >format</A>(const string_type& fmt,
|
||||
match_flag_type flags = format_default) const;
|
||||
|
||||
allocator_type <A href="#m14" >get_allocator</A>() const;
|
||||
void <A href="#m15" >swap</A>(match_results& that);
|
||||
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
typedef typename value_type::capture_sequence_type <A href="#m16" >capture_sequence_type</A>;
|
||||
const capture_sequence_type& <A href="#m17" >captures</A>(std::size_t i)const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
template <class BidirectionalIterator, class Allocator>
|
||||
bool <A href="#n1" >operator</A> == (const match_results<BidirectionalIterator, Allocator>& m1,
|
||||
const match_results<BidirectionalIterator, Allocator>& m2);
|
||||
template <class BidirectionalIterator, class Allocator>
|
||||
bool <A href="#n2" >operator</A> != (const match_results<BidirectionalIterator, Allocator>& m1,
|
||||
const match_results<BidirectionalIterator, Allocator>& m2);
|
||||
|
||||
template <class charT, class traits, class BidirectionalIterator, class Allocator>
|
||||
basic_ostream<charT, traits>&
|
||||
<A href="#n3" >operator</A> << (basic_ostream<charT, traits>& os,
|
||||
const match_results<BidirectionalIterator, Allocator>& m);
|
||||
|
||||
template <class BidirectionalIterator, class Allocator>
|
||||
void <A href="#n4" >swap</A>(match_results<BidirectionalIterator, Allocator>& m1,
|
||||
match_results<BidirectionalIterator, Allocator>& m2);
|
||||
</pre>
|
||||
<h3><a name="description"></a>Description</h3>
|
||||
<h4>match_results constructors</h4>
|
||||
<p>In all <code>match_results</code> constructors, a copy of the Allocator
|
||||
argument is used for any memory allocation performed by the constructor or
|
||||
member functions during the lifetime of the object.</p>
|
||||
<pre><A name=c1></A>
|
||||
match_results(const Allocator& a = Allocator());
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Constructs an object of class match_results. The postconditions
|
||||
of this function are indicated in the table:</p>
|
||||
<p align="center"></p>
|
||||
<center>
|
||||
<table id="Table2" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%"><b></b>
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%"><b></b>
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>true</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>0</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>str()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>basic_string<charT>()</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
</center>
|
||||
<p> </p>
|
||||
<pre><A name=c2></A>
|
||||
match_results(const match_results& m);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Constructs an object of class match_results, as a copy of m.</p>
|
||||
<pre><A name=c3></A>
|
||||
match_results& operator=(const match_results& m);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Assigns m to *this. The postconditions of this function are
|
||||
indicated in the table:</p>
|
||||
<p align="center"></p>
|
||||
<center>
|
||||
<table id="Table3" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%"><b></b>
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%"><b></b>
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>empty()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.empty().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>size()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.size().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>str(n)</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.str(n) for all integers n < m.size().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>prefix()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.prefix().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>suffix()</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.suffix().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>(*this)[n]</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m[n] for all integers n < m.size().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>length(n)</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.length(n) for all integers n < m.size().</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>position(n)</p>
|
||||
</td>
|
||||
<td vAlign="top" width="50%">
|
||||
<p>m.position(n) for all integers n < m.size().</p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
</center>
|
||||
<h4>match_results size</h4>
|
||||
<pre><A name=m1></A>
|
||||
size_type size()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns the number of sub_match elements stored in *this; that
|
||||
is the number of marked sub-expressions in the regular expression that was
|
||||
matched plus one.</p>
|
||||
<pre><A name=m2></A>
|
||||
size_type max_size()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns the maximum number of sub_match elements that can be
|
||||
stored in *this.</p>
|
||||
<pre><A name=m3></A>
|
||||
bool empty()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns <code>size() == 0</code>.</p>
|
||||
<h4>match_results element access</h4>
|
||||
<pre><A name=m4></A>
|
||||
difference_type length(int sub = 0)const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns the length of sub-expression <EM>sub</EM>, that is to
|
||||
say: <code>(*this)[sub].length()</code>.</p>
|
||||
<pre><A name=m5></A>
|
||||
difference_type position(unsigned int sub = 0)const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns the starting location of sub-expression <EM>sub</EM>,
|
||||
or -1 if <EM>sub</EM> was not matched. Note that if this represents a <A href="partial_matches.html">
|
||||
partial match</A> , then <code>position()</code> will return the location of
|
||||
the partial match even though <code>(*this)[0].matched</code> is <EM>false</EM>.</p>
|
||||
<pre><A name=m6></A>
|
||||
string_type str(int sub = 0)const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns sub-expression <EM>sub</EM> as a string: <code>string_type((*this)[sub]).</code></p>
|
||||
<pre><A name=m7></A>
|
||||
const_reference operator[](int n) const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a reference to the <code>sub_match</code> object
|
||||
representing the character sequence that matched marked sub-expression <i>n</i>.
|
||||
If <code>n == 0</code> then returns a reference to a <code>sub_match</code> object
|
||||
representing the character sequence that matched the whole regular
|
||||
expression. If <EM>n</EM> is out of range, or if <EM>n</EM> is an
|
||||
unmatched sub-expression, then returns a sub_match object whose <EM>matched</EM>
|
||||
member is <EM>false</EM>.</p>
|
||||
<pre><A name=m8></A>
|
||||
const_reference prefix()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a reference to the <code>sub_match</code> object
|
||||
representing the character sequence from the start of the string being
|
||||
matched/searched, to the start of the match found.</p>
|
||||
<pre><A name=m9></A>
|
||||
const_reference suffix()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a reference to the <code>sub_match</code> object
|
||||
representing the character sequence from the end of the match found to the end
|
||||
of the string being matched/searched.</p>
|
||||
<pre><A name=m10></A>
|
||||
const_iterator begin()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a starting iterator that enumerates over all the marked
|
||||
sub-expression matches stored in *this.</p>
|
||||
<pre><A name=m11></A>
|
||||
const_iterator end()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a terminating iterator that enumerates over all the
|
||||
marked sub-expression matches stored in *this.</p>
|
||||
<h4><A name="format"></A>match_results reformatting</h4>
|
||||
<pre><A name=m12></A>template <class OutputIterator>
|
||||
OutputIterator format(OutputIterator out,
|
||||
const string_type& fmt,
|
||||
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Requires:</b> The type OutputIterator conforms to the Output Iterator
|
||||
requirements (24.1.2).</p>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Copies the character sequence <i>[fmt.begin(), fmt.end())</i> to
|
||||
OutputIterator <i>out</i>. For each format specifier or escape sequence in <i>fmt</i>,
|
||||
replace that sequence with either the character(s) it represents, or the
|
||||
sequence of characters within *this to which it refers. The bitmasks specified
|
||||
in <i><A href="match_flag_type.html">flags</A></i> determines what <A href="format_syntax.html">
|
||||
format specifiers or escape sequences are recognized</A>, by default this is
|
||||
the format used by ECMA-262, ECMAScript Language Specification, Chapter 15 part
|
||||
5.4.11 String.prototype.replace.</p>
|
||||
<b></b>
|
||||
<p><b>Returns:</b> <i>out</i>.</p>
|
||||
<pre><A name=m13></A>
|
||||
string_type format(const string_type& fmt,
|
||||
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a copy of the string <i>fmt</i>. For each format
|
||||
specifier or escape sequence in <i>fmt</i>, replace that sequence with either
|
||||
the character(s) it represents, or the sequence of characters within *this to
|
||||
which it refers. The bitmasks specified in <i><A href="match_flag_type.html">flags</A></i>
|
||||
determines what <A href="format_syntax.html">format specifiers or escape sequences
|
||||
are recognized</A>, by default this is the format used by ECMA-262,
|
||||
ECMAScript Language Specification, Chapter 15 part 5.4.11
|
||||
String.prototype.replace.</p>
|
||||
<H4>Allocator access</H4>
|
||||
<pre><A name=m14>allocator_type get_allocator()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Returns a copy of the Allocator that was passed to the object's
|
||||
constructor.</p>
|
||||
<H4><A name="m15"></A>Swap</H4>
|
||||
<PRE>void swap(match_results& that);
|
||||
</PRE>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> Swaps the contents of the two sequences.</p>
|
||||
<b></b>
|
||||
<p><b>Postcondition:</b> <code>*this</code> contains the sequence of matched
|
||||
sub-expressions that were in <code>that</code>, <code>that</code> contains the
|
||||
sequence of matched sub-expressions that were in <code>*this</code>.</p>
|
||||
<b></b>
|
||||
<p><b>Complexity:</b> constant time.</p>
|
||||
<H4>Captures</H4>
|
||||
<PRE><A name=m16></A>typedef typename value_type::capture_sequence_type capture_sequence_type;</PRE>
|
||||
<P>Defines an implementation-specific type that satisfies the requirements of
|
||||
a standard library Sequence (21.1.1 including the optional Table 68
|
||||
operations), whose value_type is a <EM>sub_match<BidirectionalIterator></EM>. This
|
||||
type happens to be <EM>std::vector<sub_match<BidirectionalIterator> ></EM>,
|
||||
but you shouldn't actually rely on that.</P>
|
||||
<PRE><A name=m17></A>const capture_sequence_type& <A href="#m8" >captures</A>(std::size_t i)const; </PRE>
|
||||
<P><STRONG>Effects:</STRONG> returns a sequence containing all the captures
|
||||
obtained for sub-expression <EM>i</EM>.</P>
|
||||
<P><STRONG>Returns:</STRONG> <code>(*this)[i].captures();</code></P>
|
||||
<P><STRONG>Preconditions:</STRONG> the library must be built and used with
|
||||
BOOST_REGEX_MATCH_EXTRA defined, and you must pass the flag <A href="match_flag_type.html">
|
||||
match_extra</A> to the regex matching functions (<A href="regex_match.html">regex_match</A>,
|
||||
<A href="regex_search.html">regex_search</A>, <A href="regex_iterator.html">regex_iterator</A>
|
||||
or <A href="regex_token_iterator.html">regex_token_iterator</A>) in order for
|
||||
this member function to be defined and return useful information.</P>
|
||||
<P><STRONG>Rationale:</STRONG> Enabling this feature has several consequences:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>
|
||||
sub_match occupies more memory resulting in complex expressions running out of
|
||||
memory or stack space more quickly during matching.
|
||||
<LI>
|
||||
The matching algorithms are less efficient at handling some features
|
||||
(independent sub-expressions for example), even when match_extra is not used.
|
||||
<LI>
|
||||
The matching algorithms are much less efficient (i.e. slower), when match_extra
|
||||
is used. Mostly this is down to the extra memory allocations that have to
|
||||
take place.</LI></UL>
|
||||
<h4>match_results non-members</h4>
|
||||
<PRE><A name=n1></A>template <class BidirectionalIterator, class Allocator>
|
||||
bool operator == (const match_results<BidirectionalIterator, Allocator>& m1,
|
||||
const match_results<BidirectionalIterator, Allocator>& m2);</PRE>
|
||||
<P><B>Effects:</B> Compares the two sequences for equality.</P>
|
||||
<PRE><A name=n2></A>template <class BidirectionalIterator, class Allocator>
|
||||
bool operator != (const match_results<BidirectionalIterator, Allocator>& m1,
|
||||
const match_results<BidirectionalIterator, Allocator>& m2);</PRE>
|
||||
<P><B>Effects:</B> Compares the two sequences for inequality.</P>
|
||||
<PRE><A name=n3></A>template <class charT, class traits, class BidirectionalIterator, class Allocator>
|
||||
basic_ostream<charT, traits>&
|
||||
operator << (basic_ostream<charT, traits>& os,
|
||||
const match_results<BidirectionalIterator, Allocator>& m);</PRE>
|
||||
<P><B>Effects:</B> Writes the contents of <EM>m</EM> to the stream <EM>os</EM> as
|
||||
if by calling <code>os << m.str();</code> Returns <EM>os</EM>..</P>
|
||||
<PRE><A name=n4></A>template <class BidirectionalIterator, class Allocator>
|
||||
void swap(match_results<BidirectionalIterator, Allocator>& m1,
|
||||
match_results<BidirectionalIterator, Allocator>& m2);</PRE>
|
||||
<P><B>Effects:</B> Swaps the contents of the two sequences.</P>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,294 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Working With MFC/ATL String Types</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Working With MFC/ATL String Types.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#intro">Introduction</A> <dt><A href="#types">Types</A> <dt><A href="#create">Regular
|
||||
Expression Creation</A> <dt><A href="#algo">Overloaded Algorithms</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#regex_match">regex_match</A> <dt><A href="#regex_search">regex_search</A>
|
||||
<dt><A href="#regex_replace">regex_replace</A> </dt>
|
||||
</dl>
|
||||
<dt><A href="#iterators">Iterators</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#regex_iterator">regex_iterator creation helper</A> <dt><A href="#regex_token_iterator">
|
||||
regex_token_iterator creation helpers</A></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
</dl>
|
||||
<H3><a name="intro"></a>Introduction</H3>
|
||||
<P>The header <boost/regex/mfc.hpp> provides Boost.Regex support for MFC
|
||||
string types: note that this support requires Visual Studio .NET (Visual C++ 7)
|
||||
or later, where all of the MFC and ATL string types are based around
|
||||
the CSimpleStringT class template. </P>
|
||||
<P>In the following documentation, whenever you see CSimpleStringT<charT>,
|
||||
then you can substitute any of the following MFC/ATL types (all of which
|
||||
inherit from CSimpleStringT):</P>
|
||||
<P>CString<BR>
|
||||
CStringA<BR>
|
||||
CStringW<BR>
|
||||
CAtlString<BR>
|
||||
CAtlStringA<BR>
|
||||
CAtlStringW<BR>
|
||||
CStringT<charT,traits><BR>
|
||||
CFixedStringT<charT,N><BR>
|
||||
CSimpleStringT<charT></B></P>
|
||||
<H3><A name="types"></A>Types</H3>
|
||||
<P>The following typedefs are provided for the convenience of those working with
|
||||
TCHAR's:</P>
|
||||
<PRE>typedef <A href="basic_regex.html" >basic_regex</A><TCHAR> tregex;
|
||||
typedef <A href="match_results.html" >match_results</A><TCHAR const*> tmatch;
|
||||
typedef <A href="regex_iterator.html" >regex_iterator</A><TCHAR const*> tregex_iterator;
|
||||
typedef <A href="regex_token_iterator.html" >regex_token_iterator</A><TCHAR const*> tregex_token_iterator;
|
||||
</PRE>
|
||||
<P>If you are working with explicitly narrow or wide characters rather than TCHAR,
|
||||
then use the regular Boost.Regex types instead.</P>
|
||||
<H3><A name="create"></A>Regular Expression Creation</H3>
|
||||
<P>The following helper function is available to assist in the creation of a
|
||||
regular expression from an MFC/ATL string type:</P>
|
||||
<pre>template <class charT>
|
||||
basic_regex<charT>
|
||||
make_regex(const ATL::CSimpleStringT<charT>& s,
|
||||
::boost::regex_constants::syntax_option_type f = boost::regex_constants::normal);</pre>
|
||||
<P><STRONG>Effects</STRONG>: returns basic_regex<charT>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), f);</P>
|
||||
<H3><A name="algo"></A>Overloaded Algorithms</H3>
|
||||
<P>For each regular expression algorithm that's overloaded for a std::basic_string
|
||||
argument, there is also one overloaded for the MFC/ATL string types.
|
||||
These algorithm signatures all look a lot more complex than they actually
|
||||
are, but for completeness here they are anyway:</P>
|
||||
<H4><A name="regex_match"></A>regex_match</H4>
|
||||
<P>There are two overloads, the first reports what matched in a match_results
|
||||
structure, the second does not.
|
||||
</P>
|
||||
<P>All the usual caveats for <A href="regex_match.html">regex_match</A> apply, in
|
||||
particular the algorithm will only report a successful match if <STRONG>all of the
|
||||
input text matches the expression</STRONG>, if this isn't what you want then
|
||||
use <A href="regex_search.html">regex_search</A> instead.</P>
|
||||
<PRE>template <class charT, class T, class A>
|
||||
bool regex_match(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
match_results<const B*, A>& what,
|
||||
const basic_regex<charT, T>& e,
|
||||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default); </PRE>
|
||||
<P>
|
||||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_match.html">regex_match</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), what, e, f);</P>
|
||||
<p><strong>Example:</strong></p>
|
||||
<pre>//
|
||||
// Extract filename part of a path from a CString and return the result
|
||||
// as another CString:
|
||||
//
|
||||
CString get_filename(const CString& path)
|
||||
{
|
||||
boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
|
||||
boost::tmatch what;
|
||||
if(boost::regex_match(path, what, r))
|
||||
{
|
||||
// extract $1 as a CString:
|
||||
return CString(what[1].first, what.length(1));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("Invalid pathname");
|
||||
}
|
||||
}
|
||||
</pre>
|
||||
<hr>
|
||||
<PRE>template <class charT, class T>
|
||||
bool regex_match(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<B, T>& e,
|
||||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)</PRE>
|
||||
<P>
|
||||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_match.html">regex_match</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, f);</P>
|
||||
<p><strong>Example:</strong></p>
|
||||
<pre>//
|
||||
// Find out if *password* meets our password requirements,
|
||||
// as defined by the regular expression *requirements*.
|
||||
//
|
||||
bool is_valid_password(const CString& password, const CString& requirements)
|
||||
{
|
||||
return boost::regex_match(password, boost::make_regex(requirements));
|
||||
} </pre>
|
||||
<hr>
|
||||
<H4><A name="regex_search"></A>regex_search</H4>
|
||||
<P>There are two additional overloads for <A href="regex_search.html">regex_search</A>,
|
||||
the first reports what matched the second does not:</P>
|
||||
<PRE>template <class charT, class A, class T>
|
||||
bool regex_search(const ATL::CSimpleStringT<charT>& s,
|
||||
match_results<const charT*, A>& what,
|
||||
const basic_regex<charT, T>& e,
|
||||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)</PRE>
|
||||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_search.html">regex_search</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), what, e, f);</P>
|
||||
<P><STRONG>Example:</STRONG>: Postcode extraction from an address string.</P>
|
||||
<pre>CString extract_postcode(const CString& address)
|
||||
{
|
||||
// searches throw address for a UK postcode and returns the result,
|
||||
// the expression used is by Phil A. on www.regxlib.com:
|
||||
boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
|
||||
boost::tmatch what;
|
||||
if(boost::regex_search(address, what, r))
|
||||
{
|
||||
// extract $0 as a CString:
|
||||
return CString(what[0].first, what.length());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("No postcode found");
|
||||
}
|
||||
} </pre>
|
||||
<hr>
|
||||
<pre>template <class charT, class T>
|
||||
inline bool regex_search(const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT, T>& e,
|
||||
boost::regex_constants::match_flag_type f = boost::regex_constants::match_default)
|
||||
</pre>
|
||||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_search.html">regex_search</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, f);</P>
|
||||
<hr>
|
||||
<H4><A name="regex_replace"></A>regex_replace</H4>
|
||||
<P>There are two additional overloads for <A href="regex_replace.html">regex_replace</A>,
|
||||
the first sends output to an output iterator, while the second creates a new
|
||||
string</P>
|
||||
<PRE>template <class OutputIterator, class BidirectionalIterator, class traits, class
|
||||
charT>
|
||||
OutputIterator regex_replace(OutputIterator out,
|
||||
BidirectionalIterator first,
|
||||
BidirectionalIterator last,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const ATL::CSimpleStringT<charT>& fmt,
|
||||
match_flag_type flags = match_default)
|
||||
</PRE>
|
||||
<P><STRONG>Effects</STRONG>: returns ::boost::<A href="regex_replace.html">regex_replace</A>(out,
|
||||
first, last, e, fmt.GetString(), flags);</P>
|
||||
<pre>template <class traits, charT>
|
||||
ATL::CSimpleStringT<charT> regex_replace(const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const ATL::CSimpleStringT<charT>& fmt,
|
||||
match_flag_type flags = match_default)</pre>
|
||||
<P><STRONG>Effects</STRONG>: returns a new string created using <A href="regex_replace.html">
|
||||
regex_replace</A>, and the same memory manager as string <EM>s</EM>.</P>
|
||||
<P><STRONG>Example:</STRONG></P>
|
||||
<PRE>//
|
||||
// Take a credit card number as a string of digits,
|
||||
// and reformat it as a human readable string with "-"
|
||||
// separating each group of four digits:
|
||||
//
|
||||
const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
|
||||
const CString human_format = __T("$1-$2-$3-$4");
|
||||
|
||||
CString human_readable_card_number(const CString& s)
|
||||
{
|
||||
return boost::regex_replace(s, e, human_format);
|
||||
}
|
||||
</PRE>
|
||||
<H3><a name="iterators"></a>Iterators</H3>
|
||||
<P>The following helper functions are provided to ease the conversion from an
|
||||
MFC/ATL string to a <A href="regex_iterator.html">regex_iterator</A> or <A href="regex_token_iterator.html">
|
||||
regex_token_iterator</A>:</P>
|
||||
<H4><A name="regex_iterator"></A>regex_iterator creation helper</H4>
|
||||
<PRE>template <class charT>
|
||||
regex_iterator<charT const*>
|
||||
make_regex_iterator(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT>& e,
|
||||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||||
</PRE>
|
||||
<p><STRONG>Effects:</STRONG>returns <A href="regex_iterator.html">regex_iterator</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, f);</p>
|
||||
<p><strong>Example:</strong></p>
|
||||
<pre>void enumerate_links(const CString& html)
|
||||
{
|
||||
// enumerate and print all the <a> links in some HTML text,
|
||||
// the expression used is by Andew Lee on <a href="http://www.regxlib.com">www.regxlib.com</a>:
|
||||
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
|
||||
boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
|
||||
while(i != j)
|
||||
{
|
||||
std::cout << (*i)[1] << std::endl;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
</pre>
|
||||
<hr>
|
||||
<H4><A name="regex_token_iterator"></A>regex_token_iterator creation helpers</H4>
|
||||
<PRE>template <class charT>
|
||||
regex_token_iterator<charT const*>
|
||||
make_regex_token_iterator(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT>& e,
|
||||
int sub = 0,
|
||||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||||
</PRE>
|
||||
<p><STRONG>Effects:</STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, sub, f);</p>
|
||||
<pre>template <class charT>
|
||||
regex_token_iterator<charT const*>
|
||||
make_regex_token_iterator(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT>& e,
|
||||
const std::vector<int>& subs,
|
||||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||||
</pre>
|
||||
<p><STRONG>Effects:</STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, subs, f);</p>
|
||||
<pre>template <class charT, std::size_t N>
|
||||
regex_token_iterator<charT const*>
|
||||
make_regex_token_iterator(
|
||||
const ATL::CSimpleStringT<charT>& s,
|
||||
const basic_regex<charT>& e,
|
||||
const int (& subs)[N],
|
||||
::boost::regex_constants::match_flag_type f = boost::regex_constants::match_default);
|
||||
</pre>
|
||||
<p><STRONG>Effects: </STRONG>returns <A href="regex_token_iterator.html">regex_token_iterator</A>(s.GetString(),
|
||||
s.GetString() + s.GetLength(), e, subs, f);</p>
|
||||
<P><STRONG>Example:</STRONG></P>
|
||||
<PRE>void enumerate_links2(const CString& html)
|
||||
{
|
||||
// enumerate and print all the <a> links in some HTML text,
|
||||
// the expression used is by Andew Lee on <a href="http://www.regxlib.com">www.regxlib.com</a>:
|
||||
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
|
||||
boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
|
||||
while(i != j)
|
||||
{
|
||||
std::cout << *i << std::endl;
|
||||
++i;
|
||||
}
|
||||
} </PRE>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Dec 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,53 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Working With Non-Standard String Types</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Working With Non-Standard String Types.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The Boost.Regex algorithms and iterators are all iterator-based, with
|
||||
convenience overloads of the algorithms provided that convert standard library
|
||||
string types to iterator pairs internally. If you want to search a
|
||||
non-standard string type then the trick is to convert that string into an
|
||||
iterator pair: so far I haven't come across any string types that can't be
|
||||
handled this way, even if they're not officially iterator based.
|
||||
Certainly any string type that provides access to it's internal buffer, along
|
||||
with it's length, can be converted into a pair of pointers (which can be used
|
||||
as iterators).</P>
|
||||
<P>Some non-standard string types are sufficiently common that wappers have been
|
||||
provided for them:</P>
|
||||
<P><A href="mfc_strings.html">MFC/ATL Strings.</A><BR>
|
||||
<A href="icu_strings.html">ICU Strings.</A></P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Nov 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,195 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Partial Matches</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Partial Matches</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The <A href="match_flag_type.html">match-flag</A> <CODE>match_partial</CODE> can
|
||||
be passed to the following algorithms: <A href="regex_match.html">regex_match</A>,
|
||||
<A href="regex_search.html">regex_search</A>, and <A href="regex_grep.html">regex_grep</A>,
|
||||
and used with the iterator <A href="regex_iterator.html">regex_iterator</A>.
|
||||
When used it indicates that partial as well as full matches should be found. A
|
||||
partial match is one that matched one or more characters at the end of the text
|
||||
input, but did not match all of the regular expression (although it may have
|
||||
done so had more input been available). Partial matches are typically used when
|
||||
either validating data input (checking each character as it is entered on the
|
||||
keyboard), or when searching texts that are either too long to load into memory
|
||||
(or even into a memory mapped file), or are of indeterminate length (for
|
||||
example the source may be a socket or similar). Partial and full matches can be
|
||||
differentiated as shown in the following table (the variable M represents an
|
||||
instance of <A href="match_results.html">match_results<></A> as filled in
|
||||
by regex_match, regex_search or regex_grep):<BR>
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD vAlign="top" width="20%"> </TD>
|
||||
<TD vAlign="top" width="20%">Result</TD>
|
||||
<TD vAlign="top" width="20%">M[0].matched</TD>
|
||||
<TD vAlign="top" width="20%">M[0].first</TD>
|
||||
<TD vAlign="top" width="20%">M[0].second</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="20%">No match</TD>
|
||||
<TD vAlign="top" width="20%">False</TD>
|
||||
<TD vAlign="top" width="20%">Undefined</TD>
|
||||
<TD vAlign="top" width="20%">Undefined</TD>
|
||||
<TD vAlign="top" width="20%">Undefined</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="20%">Partial match</TD>
|
||||
<TD vAlign="top" width="20%">True</TD>
|
||||
<TD vAlign="top" width="20%">False</TD>
|
||||
<TD vAlign="top" width="20%">Start of partial match.</TD>
|
||||
<TD vAlign="top" width="20%">End of partial match (end of text).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="20%">Full match</TD>
|
||||
<TD vAlign="top" width="20%">True</TD>
|
||||
<TD vAlign="top" width="20%">True</TD>
|
||||
<TD vAlign="top" width="20%">Start of full match.</TD>
|
||||
<TD vAlign="top" width="20%">End of full match.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>Be aware that using partial matches can sometimes result in somewhat imperfect
|
||||
behavior:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
There are some expressions, such as ".*abc" that will always produce a partial
|
||||
match. This problem can be reduced by careful construction of the regular
|
||||
expressions used, or by setting flags like match_not_dot_newline so that
|
||||
expressions like .* can't match past line boundaries.</LI>
|
||||
<LI>
|
||||
Boost.Regex currently prefers leftmost matches to full matches, so for example
|
||||
matching "abc|b" against "ab" produces a partial match against the "ab"
|
||||
rather than a full match against "b". It's more efficient to work this
|
||||
way, but may not be the behavior you want in all situations.</LI></UL>
|
||||
<P>The following <A href="../example/snippets/partial_regex_match.cpp">example</A>
|
||||
tests to see whether the text could be a valid credit card number, as the user
|
||||
presses a key, the character entered would be added to the string being built
|
||||
up, and passed to <CODE>is_possible_card_number</CODE>. If this returns true
|
||||
then the text could be a valid card number, so the user interface's OK button
|
||||
would be enabled. If it returns false, then this is not yet a valid card
|
||||
number, but could be with more input, so the user interface would disable the
|
||||
OK button. Finally, if the procedure throws an exception the input could never
|
||||
become a valid number, and the inputted character must be discarded, and a
|
||||
suitable error indication displayed to the user.</P>
|
||||
<PRE>#include <string>
|
||||
#include <iostream>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
boost::regex e("(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})");
|
||||
|
||||
bool is_possible_card_number(const std::string& input)
|
||||
{
|
||||
//
|
||||
// return false for partial match, true for full match, or throw for
|
||||
// impossible match based on what we have so far...
|
||||
boost::match_results<std::string::const_iterator> what;
|
||||
if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
|
||||
{
|
||||
// the input so far could not possibly be valid so reject it:
|
||||
throw std::runtime_error("Invalid data entered - this could not possibly be a valid card number");
|
||||
}
|
||||
// OK so far so good, but have we finished?
|
||||
if(what[0].matched)
|
||||
{
|
||||
// excellent, we have a result:
|
||||
return true;
|
||||
}
|
||||
// what we have so far is only a partial match...
|
||||
return false;
|
||||
}</PRE>
|
||||
<P>In the following <A href="../example/snippets/partial_regex_grep.cpp">example</A>,
|
||||
text input is taken from a stream containing an unknown amount of text; this
|
||||
example simply counts the number of html tags encountered in the stream. The
|
||||
text is loaded into a buffer and searched a part at a time, if a partial match
|
||||
was encountered, then the partial match gets searched a second time as the
|
||||
start of the next batch of text:</P>
|
||||
<PRE>#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
// match some kind of html tag:
|
||||
boost::regex e("<[^>]*>");
|
||||
// count how many:
|
||||
unsigned int tags = 0;
|
||||
// saved position of partial match:
|
||||
char* next_pos = 0;
|
||||
|
||||
bool grep_callback(const boost::match_results<char*>& m)
|
||||
{
|
||||
if(m[0].matched == false)
|
||||
{
|
||||
// save position and return:
|
||||
next_pos = m[0].first;
|
||||
}
|
||||
else
|
||||
++tags;
|
||||
return true;
|
||||
}
|
||||
|
||||
void search(std::istream& is)
|
||||
{
|
||||
char buf[4096];
|
||||
next_pos = buf + sizeof(buf);
|
||||
bool have_more = true;
|
||||
while(have_more)
|
||||
{
|
||||
// how much do we copy forward from last try:
|
||||
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
||||
// and how much is left to fill:
|
||||
unsigned size = next_pos - buf;
|
||||
// copy forward whatever we have left:
|
||||
memcpy(buf, next_pos, leftover);
|
||||
// fill the rest from the stream:
|
||||
unsigned read = is.readsome(buf + leftover, size);
|
||||
// check to see if we've run out of text:
|
||||
have_more = read == size;
|
||||
// reset next_pos:
|
||||
next_pos = buf + sizeof(buf);
|
||||
// and then grep:
|
||||
boost::regex_grep(grep_callback,
|
||||
buf,
|
||||
buf + read + leftover,
|
||||
e,
|
||||
boost::match_default | boost::match_partial);
|
||||
}
|
||||
}</PRE>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,52 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Performance</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Performance</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The performance of Boost.regex in both recursive and non-recursive modes should
|
||||
be broadly comparable to other regular expression libraries: recursive mode is
|
||||
slightly faster (especially where memory allocation requires thread
|
||||
synchronisation), but not by much. The following pages compare
|
||||
Boost.regex with various other regular expression libraries for the following
|
||||
compilers:</P>
|
||||
<P><A href="vc71-performance.html">Visual Studio.Net 2003 (recursive Boost.regex
|
||||
implementation)</A>.</P>
|
||||
<P><A href="gcc-performance.html">Gcc 3.2 (cygwin) (non-recursive Boost.regex
|
||||
implementation).</A></P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,286 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX API Compatibility Functions</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX API Compatibility Functions</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<PRE>#include <boost/cregex.hpp>
|
||||
<I>or</I>:
|
||||
#include <boost/regex.h></PRE>
|
||||
<P>The following functions are available for users who need a POSIX compatible C
|
||||
library, they are available in both Unicode and narrow character versions, the
|
||||
standard POSIX API names are macros that expand to one version or the other
|
||||
depending upon whether UNICODE is defined or not.
|
||||
</P>
|
||||
<P><B>Important</B>: Note that all the symbols defined here are enclosed inside
|
||||
namespace <I>boost</I> when used in C++ programs, unless you use #include
|
||||
<boost/regex.h> instead - in which case the symbols are still defined in
|
||||
namespace boost, but are made available in the global namespace as well.</P>
|
||||
<P>The functions are defined as:
|
||||
</P>
|
||||
<PRE>extern "C" {
|
||||
<B>int</B> regcompA(regex_tA*, <B>const</B> <B>char</B>*, <B>int</B>);
|
||||
<B>unsigned</B> <B>int</B> regerrorA(<B>int</B>, <B>const</B> regex_tA*, <B>char</B>*, <B>unsigned</B> <B>int</B>);
|
||||
<B>int</B> regexecA(<B>const</B> regex_tA*, <B>const</B> <B>char</B>*, <B>unsigned</B> <B>int</B>, regmatch_t*, <B>int</B>);
|
||||
<B>void</B> regfreeA(regex_tA*);
|
||||
|
||||
<B>int</B> regcompW(regex_tW*, <B>const</B> <B>wchar_t</B>*, <B>int</B>);
|
||||
<B>unsigned</B> <B>int</B> regerrorW(<B>int</B>, <B>const</B> regex_tW*, <B>wchar_t</B>*, <B>unsigned</B> <B>int</B>);
|
||||
<B>int</B> regexecW(<B>const</B> regex_tW*, <B>const</B> <B>wchar_t</B>*, <B>unsigned</B> <B>int</B>, regmatch_t*, <B>int</B>);
|
||||
<B>void</B> regfreeW(regex_tW*);
|
||||
|
||||
#ifdef UNICODE
|
||||
#define regcomp regcompW
|
||||
#define regerror regerrorW
|
||||
#define regexec regexecW
|
||||
#define regfree regfreeW
|
||||
#define regex_t regex_tW
|
||||
#else
|
||||
#define regcomp regcompA
|
||||
#define regerror regerrorA
|
||||
#define regexec regexecA
|
||||
#define regfree regfreeA
|
||||
#define regex_t regex_tA
|
||||
#endif
|
||||
}</PRE>
|
||||
<P>All the functions operate on structure <B>regex_t</B>, which exposes two public
|
||||
members:
|
||||
</P>
|
||||
<P><B>unsigned int re_nsub</B> this is filled in by <B>regcomp</B> and indicates
|
||||
the number of sub-expressions contained in the regular expression.
|
||||
</P>
|
||||
<P><B>const TCHAR* re_endp</B> points to the end of the expression to compile when
|
||||
the flag REG_PEND is set.
|
||||
</P>
|
||||
<P><I>Footnote: regex_t is actually a #define - it is either regex_tA or regex_tW
|
||||
depending upon whether UNICODE is defined or not, TCHAR is either char or
|
||||
wchar_t again depending upon the macro UNICODE.</I>
|
||||
</P>
|
||||
<H3>regcomp</H3>
|
||||
<P><B>regcomp</B> takes a pointer to a <B>regex_t</B>, a pointer to the expression
|
||||
to compile and a flags parameter which can be a combination of:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_EXTENDED</TD>
|
||||
<TD vAlign="top" width="45%">Compiles modern regular expressions. Equivalent to
|
||||
regbase::char_classes | regbase::intervals | regbase::bk_refs.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_BASIC</TD>
|
||||
<TD vAlign="top" width="45%">Compiles basic (obsolete) regular expression syntax.
|
||||
Equivalent to regbase::char_classes | regbase::intervals | regbase::limited_ops
|
||||
| regbase::bk_braces | regbase::bk_parens | regbase::bk_refs.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_NOSPEC</TD>
|
||||
<TD vAlign="top" width="45%">All characters are ordinary, the expression is a
|
||||
literal string.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_ICASE</TD>
|
||||
<TD vAlign="top" width="45%">Compiles for matching that ignores character case.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_NOSUB</TD>
|
||||
<TD vAlign="top" width="45%">Has no effect in this library.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_NEWLINE</TD>
|
||||
<TD vAlign="top" width="45%">When this flag is set a dot does not match the
|
||||
newline character.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_PEND</TD>
|
||||
<TD vAlign="top" width="45%">When this flag is set the re_endp parameter of the
|
||||
regex_t structure must point to the end of the regular expression to compile.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_NOCOLLATE</TD>
|
||||
<TD vAlign="top" width="45%">When this flag is set then locale dependent collation
|
||||
for character ranges is turned off.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_ESCAPE_IN_LISTS<BR>
|
||||
, , ,
|
||||
</TD>
|
||||
<TD vAlign="top" width="45%">When this flag is set, then escape sequences are
|
||||
permitted in bracket expressions (character sets).</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_NEWLINE_ALT </TD>
|
||||
<TD vAlign="top" width="45%">When this flag is set then the newline character is
|
||||
equivalent to the alternation operator |.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_PERL </TD>
|
||||
<TD vAlign="top" width="45%">Compiles Perl like regular expressions.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_AWK</TD>
|
||||
<TD vAlign="top" width="45%">A shortcut for awk-like behavior: REG_EXTENDED |
|
||||
REG_ESCAPE_IN_LISTS</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_GREP</TD>
|
||||
<TD vAlign="top" width="45%">A shortcut for grep like behavior: REG_BASIC |
|
||||
REG_NEWLINE_ALT</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="45%">REG_EGREP</TD>
|
||||
<TD vAlign="top" width="45%"> A shortcut for egrep like behavior:
|
||||
REG_EXTENDED | REG_NEWLINE_ALT</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3>regerror</H3>
|
||||
<P>regerror takes the following parameters, it maps an error code to a human
|
||||
readable string:
|
||||
<BR>
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="50%">int code</TD>
|
||||
<TD vAlign="top" width="50%">The error code.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">const regex_t* e</TD>
|
||||
<TD vAlign="top" width="50%">The regular expression (can be null).</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">char* buf</TD>
|
||||
<TD vAlign="top" width="50%">The buffer to fill in with the error message.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">unsigned int buf_size</TD>
|
||||
<TD vAlign="top" width="50%">The length of buf.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>If the error code is OR'ed with REG_ITOA then the message that results is the
|
||||
printable name of the code rather than a message, for example "REG_BADPAT". If
|
||||
the code is REG_ATIO then <B>e</B> must not be null and <B>e->re_pend</B> must
|
||||
point to the printable name of an error code, the return value is then the
|
||||
value of the error code. For any other value of <B>code</B>, the return value
|
||||
is the number of characters in the error message, if the return value is
|
||||
greater than or equal to <B>buf_size</B> then <B>regerror</B> will have to be
|
||||
called again with a larger buffer.</P>
|
||||
<H3>regexec</H3>
|
||||
<P><B>regexec</B> finds the first occurrence of expression <B>e</B> within string <B>buf</B>.
|
||||
If <B>len</B> is non-zero then *<B>m</B> is filled in with what matched the
|
||||
regular expression, <B>m[0]</B> contains what matched the whole string, <B>m[1] </B>
|
||||
the first sub-expression etc, see <B>regmatch_t</B> in the header file
|
||||
declaration for more details. The <B>eflags</B> parameter can be a combination
|
||||
of:
|
||||
<BR>
|
||||
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="0" cellPadding="7" width="100%" border="0">
|
||||
<TR>
|
||||
<TD width="5%"> </TD>
|
||||
<TD vAlign="top" width="50%">REG_NOTBOL</TD>
|
||||
<TD vAlign="top" width="50%">Parameter <B>buf </B>does not represent the start of
|
||||
a line.</TD>
|
||||
<TD width="5%"> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">REG_NOTEOL</TD>
|
||||
<TD vAlign="top" width="50%">Parameter <B>buf</B> does not terminate at the end of
|
||||
a line.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD> </TD>
|
||||
<TD vAlign="top" width="50%">REG_STARTEND</TD>
|
||||
<TD vAlign="top" width="50%">The string searched starts at buf + pmatch[0].rm_so
|
||||
and ends at buf + pmatch[0].rm_eo.</TD>
|
||||
<TD> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3>regfree</H3>
|
||||
<P>Finally <B>regfree</B> frees all the memory that was allocated by regcomp.
|
||||
</P>
|
||||
<P><I>Footnote: this is an abridged reference to the POSIX API functions, it is
|
||||
provided for compatibility with other libraries, rather than an API to be used
|
||||
in new code (unless you need access from a language other than C++). This
|
||||
version of these functions should also happily coexist with other versions, as
|
||||
the names used are macros that expand to the actual function names.</I>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,55 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Redistributables and Library Names</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Redistributables and Library Names</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>If you are using Microsoft or Borland C++ and link to a dll version of the run
|
||||
time library, then you can choose to also link to a dll version of boost.regex
|
||||
by defining the symbol BOOST_REGEX_DYN_LINK when you compile your code. While
|
||||
these dll's are redistributable, there are no "standard" versions, so when
|
||||
installing on the users PC, you should place these in a directory private to
|
||||
your application, and not in the PC's directory path. Note that if you link to
|
||||
a static version of your run time library, then you will also link to a static
|
||||
version of boost.regex and no dll's will need to be distributed. The possible
|
||||
boost.regex dll and library names are computed according to the <A href="../../../more/getting_started.html#step5">
|
||||
formula given in the getting started guide</A>.
|
||||
</P>
|
||||
<P>Note: you can disable automatic library selection by defining the symbol
|
||||
BOOST_REGEX_NO_LIB when compiling, this is useful if you want to build
|
||||
Boost.Regex yourself in your IDE, or if you need to debug boost.regex.
|
||||
</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
28 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,44 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Class reg_expression (deprecated)</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Class reg_expression (deprecated)</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The use of class template reg_expression is deprecated: use <A href="basic_regex.html">
|
||||
basic_regex</A> instead.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,82 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<title>Boost.Regex: regbase</title>
|
||||
<meta http-equiv="Content-Type" content=
|
||||
"text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
|
||||
"C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
|
||||
<h2 align="center">regbase</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt=
|
||||
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<hr>
|
||||
<p>Use of the type <code>boost::regbase</code> is now deprecated,
|
||||
and the type does not form a part of the <a href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
|
||||
regular expression standardization proposal</a>. This type
|
||||
still exists as a base class of <code>boost::basic_regex</code>,
|
||||
and you can still refer to <code>
|
||||
boost::regbase::constant_name</code> in your code, however for
|
||||
maximum portability to other std regex implementations you should
|
||||
instead use either:</p>
|
||||
|
||||
<pre>
|
||||
boost::regex_constants::constant_name
|
||||
</pre>
|
||||
|
||||
<p>or</p>
|
||||
|
||||
<pre>
|
||||
boost::regex::constant_name
|
||||
</pre>
|
||||
|
||||
<p>or</p>
|
||||
|
||||
<pre>
|
||||
boost::wregex::constant_name
|
||||
</pre>
|
||||
|
||||
<p></p>
|
||||
|
||||
<hr>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
481
doc/regex.html
481
doc/regex.html
@ -1,481 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: class RegEx (deprecated)</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">class RegEx (deprecated)</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<p>The high level wrapper class RegEx is now deprecated and does not form a part
|
||||
of the <a href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
|
||||
expression standardization proposal</a>. This type still exists, and
|
||||
existing code will continue to compile, however the following documentation is
|
||||
unlikely to be further updated.</p>
|
||||
<pre>
|
||||
#include <boost/cregex.hpp>
|
||||
</pre>
|
||||
<p>The class RegEx provides a high level simplified interface to the regular
|
||||
expression library, this class only handles narrow character strings, and
|
||||
regular expressions always follow the "normal" syntax - that is the same as the
|
||||
perl / ECMAScript synatx.</p>
|
||||
<pre>
|
||||
<b>typedef</b> <b>bool</b> (*GrepCallback)(<b>const</b> RegEx& expression);
|
||||
<b>typedef</b> <b>bool</b> (*GrepFileCallback)(<b>const</b> <b>char</b>* file, <b>const</b> RegEx& expression);
|
||||
<b>typedef</b> <b>bool</b> (*FindFilesCallback)(<b>const</b> <b>char</b>* file);
|
||||
|
||||
<b>class</b> RegEx
|
||||
{
|
||||
<b>public</b>:
|
||||
RegEx();
|
||||
RegEx(<b>const</b> RegEx& o);
|
||||
~RegEx();
|
||||
RegEx(<b>const</b> <b>char</b>* c, <b>bool</b> icase = <b>false</b>);
|
||||
<strong>explicit</strong> RegEx(<b>const</b> std::string& s, <b>bool</b> icase = <b>false</b>);
|
||||
RegEx& <b>operator</b>=(<b>const</b> RegEx& o);
|
||||
RegEx& <b>operator</b>=(<b>const</b> <b>char</b>* p);
|
||||
RegEx& <b>operator</b>=(<b>const</b> std::string& s);
|
||||
<b>unsigned</b> <b>int</b> SetExpression(<b>const</b> <b>char</b>* p, <b>bool</b> icase = <b>false</b>);
|
||||
<b>unsigned</b> <b>int</b> SetExpression(<b>const</b> std::string& s, <b>bool</b> icase = <b>false</b>);
|
||||
std::string Expression()<b>const</b>;
|
||||
<font color="#000080"><i>//
|
||||
</i> <i>// now matching operators:</i>
|
||||
<i>//</i></font>
|
||||
<b>bool</b> Match(<b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);
|
||||
<b>bool</b> Match(<b>const</b> std::string& s, boost::match_flag_type flags = match_default);
|
||||
<b>bool</b> Search(<b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);
|
||||
<b>bool</b> Search(<b>const</b> std::string& s, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(GrepCallback cb, <b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(GrepCallback cb, <b>const</b> std::string& s, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(std::vector<std::string>& v, <b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(std::vector<std::string>& v, <b>const</b> std::string& s, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(std::vector<<b>unsigned</b> <b>int</b>>& v, <b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> Grep(std::vector<<b>unsigned</b> <b>int</b>>& v, <b>const</b> std::string& s, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> GrepFiles(GrepFileCallback cb, <b>const</b> <b>char</b>* files, <b>bool</b> recurse = <b>false</b>, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> GrepFiles(GrepFileCallback cb, <b>const</b> std::string& files, <b>bool</b> recurse = <b>false</b>, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> FindFiles(FindFilesCallback cb, <b>const</b> <b>char</b>* files, <b>bool</b> recurse = <b>false</b>, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> <b>int</b> FindFiles(FindFilesCallback cb, <b>const</b> std::string& files, <b>bool</b> recurse = <b>false</b>, boost::match_flag_type flags = match_default);
|
||||
std::string Merge(<b>const</b> std::string& in, <b>const</b> std::string& fmt, <b>bool</b> copy = <b>true</b>, boost::match_flag_type flags = match_default);
|
||||
std::string Merge(<b>const</b> char* in, <b>const</b> char* fmt, <b>bool</b> copy = <b>true</b>, boost::match_flag_type flags = match_default);
|
||||
<b>unsigned</b> Split(std::vector<std::string>& v, std::string& s, boost::match_flag_type flags = match_default, <b>unsigned</b> max_count = ~0);
|
||||
<font color="#000080"><i>//
|
||||
</i> <i>// now operators for returning what matched in more detail:
|
||||
</i> <i>//
|
||||
</i></font> <b>unsigned</b> <b>int</b> Position(<b>int</b> i = 0)<b>const</b>;
|
||||
<b>unsigned</b> <b>int</b> Length(<b>int</b> i = 0)<b>const</b>;
|
||||
<strong>bool</strong> Matched(<strong>int</strong> i = 0)<strong>const</strong>;
|
||||
<b>unsigned</b> <b>int</b> Line()<b>const</b>;
|
||||
<b>unsigned int</b> Marks() const;
|
||||
std::string What(<b>int</b> i)<b>const</b>;
|
||||
std::string <b>operator</b>[](<b>int</b> i)<b>const</b> ;
|
||||
|
||||
<strong>static const unsigned int</strong> npos;
|
||||
};
|
||||
</pre>
|
||||
<p>Member functions for class RegEx are defined as follows:<br>
|
||||
</p>
|
||||
<p></p>
|
||||
<table id="Table2" cellspacing="0" cellpadding="7" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx();</td>
|
||||
<td valign="top" width="42%">Default constructor, constructs an instance of RegEx
|
||||
without any valid expression.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx(<b>const</b> RegEx& o);</td>
|
||||
<td valign="top" width="42%">Copy constructor, all the properties of parameter <i>o</i>
|
||||
are copied.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx(<b>const</b> <b>char</b>* c, <b>bool</b> icase
|
||||
= <b>false</b>);</td>
|
||||
<td valign="top" width="42%">Constructs an instance of RegEx, setting the
|
||||
expression to <i>c</i>, if <i>icase</i> is <i>true</i> then matching is
|
||||
insensitive to case, otherwise it is sensitive to case. Throws <i>bad_expression</i>
|
||||
on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx(<b>const</b> std::string& s, <b>bool</b> icase
|
||||
= <b>false</b>);</td>
|
||||
<td valign="top" width="42%">Constructs an instance of RegEx, setting the
|
||||
expression to <i>s</i>, if <i>icase</i> is <i>true</i> then matching is
|
||||
insensitive to case, otherwise it is sensitive to case. Throws <i>bad_expression</i>
|
||||
on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx& <b>operator</b>=(<b>const</b> RegEx&
|
||||
o);</td>
|
||||
<td valign="top" width="42%">Default assignment operator.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx& <b>operator</b>=(<b>const</b> <b>char</b>*
|
||||
p);</td>
|
||||
<td valign="top" width="42%">Assignment operator, equivalent to calling <i>SetExpression(p,
|
||||
false).</i> Throws <i>bad_expression</i> on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">RegEx& <b>operator</b>=(<b>const</b> std::string&
|
||||
s);</td>
|
||||
<td valign="top" width="42%">Assignment operator, equivalent to calling <i>SetExpression(s,
|
||||
false).</i> Throws <i>bad_expression</i> on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> SetExpression(<b>constchar</b>*
|
||||
p, <b>bool</b> icase = <b>false</b>);</td>
|
||||
<td valign="top" width="42%">Sets the current expression to <i>p</i>, if <i>icase</i>
|
||||
is <i>true</i> then matching is insensitive to case, otherwise it is sensitive
|
||||
to case. Throws <i>bad_expression</i> on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> SetExpression(<b>const</b>
|
||||
std::string& s, <b>bool</b> icase = <b>false</b>);</td>
|
||||
<td valign="top" width="42%">Sets the current expression to <i>s</i>, if <i>icase</i>
|
||||
is <i>true</i> then matching is insensitive to case, otherwise it is sensitive
|
||||
to case. Throws <i>bad_expression</i> on failure.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">std::string Expression()<b>const</b>;</td>
|
||||
<td valign="top" width="42%">Returns a copy of the current regular expression.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>bool</b> Match(<b>const</b> <b>char</b>* p,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Attempts to match the current expression against the
|
||||
text <i>p</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. Returns <i>true</i> if the expression matches the whole of
|
||||
the input string.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>bool</b> Match(<b>const</b> std::string& s,
|
||||
boost::match_flag_type flags = match_default) ;</td>
|
||||
<td valign="top" width="42%">Attempts to match the current expression against the
|
||||
text <i>s</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. Returns <i>true</i> if the expression matches the whole of
|
||||
the input string.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>bool</b> Search(<b>const</b> <b>char</b>* p,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Attempts to find a match for the current expression
|
||||
somewhere in the text <i>p</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. Returns <i>true</i> if the match succeeds.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>bool</b> Search(<b>const</b> std::string& s,
|
||||
boost::match_flag_type flags = match_default) ;</td>
|
||||
<td valign="top" width="42%">Attempts to find a match for the current expression
|
||||
somewhere in the text <i>s</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. Returns <i>true</i> if the match succeeds.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(GrepCallback cb, <b>const</b>
|
||||
<b>char</b>* p, boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>p</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match found calls the call-back function <i>cb</i>
|
||||
as: cb(*this);
|
||||
<p>If at any stage the call-back function returns false then the grep operation
|
||||
terminates, otherwise continues until no further matches are found. Returns the
|
||||
number of matches found.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(GrepCallback cb, <b>const</b>
|
||||
std::string& s, boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>s</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match found calls the call-back function <i>cb</i>
|
||||
as: cb(*this);
|
||||
<p>If at any stage the call-back function returns false then the grep operation
|
||||
terminates, otherwise continues until no further matches are found. Returns the
|
||||
number of matches found.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(std::vector<std::string>&
|
||||
v, <b>const</b> <b>char</b>* p, boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>p</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match pushes a copy of what matched onto <i>v</i>.
|
||||
Returns the number of matches found.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(std::vector<std::string>&
|
||||
v, <b>const</b> std::string& s, boost::match_flag_type flags =
|
||||
match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>s</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match pushes a copy of what matched onto <i>v</i>.
|
||||
Returns the number of matches found.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(std::vector<<b>unsigned
|
||||
int</b>>& v, <b>const</b> <b>char</b>* p, boost::match_flag_type
|
||||
flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>p</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match pushes the starting index of what matched
|
||||
onto <i>v</i>. Returns the number of matches found.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Grep(std::vector<<b>unsigned
|
||||
int</b>>& v, <b>const</b> std::string& s, boost::match_flag_type
|
||||
flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
text <i>s</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match pushes the starting index of what matched
|
||||
onto <i>v</i>. Returns the number of matches found.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> GrepFiles(GrepFileCallback
|
||||
cb, <b>const</b> <b>char</b>* files, <b>bool</b> recurse = <b>false</b>,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
files <i>files</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match calls the call-back function cb.
|
||||
<p>If the call-back returns false then the algorithm returns without considering
|
||||
further matches in the current file, or any further files. </p>
|
||||
<p>The parameter <i>files</i> can include wild card characters '*' and '?', if the
|
||||
parameter <i>recurse</i> is true then searches sub-directories for matching
|
||||
file names. </p>
|
||||
<p>Returns the total number of matches found.</p>
|
||||
<p>May throw an exception derived from std::runtime_error if file io fails.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> GrepFiles(GrepFileCallback
|
||||
cb, <b>const</b> std::string& files, <b>bool</b> recurse = <b>false</b>,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Finds all matches of the current expression in the
|
||||
files <i>files</i> using the match flags <i>flags</i> - see <a href="match_flag_type.html">
|
||||
match flags</a>. For each match calls the call-back function cb.
|
||||
<p>If the call-back returns false then the algorithm returns without considering
|
||||
further matches in the current file, or any further files. </p>
|
||||
<p>The parameter <i>files</i> can include wild card characters '*' and '?', if the
|
||||
parameter <i>recurse</i> is true then searches sub-directories for matching
|
||||
file names. </p>
|
||||
<p>Returns the total number of matches found.</p>
|
||||
<p>May throw an exception derived from std::runtime_error if file io fails.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> FindFiles(FindFilesCallback
|
||||
cb, <b>const</b> <b>char</b>* files, <b>bool</b> recurse = <b>false</b>,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Searches <i>files</i> to find all those which contain
|
||||
at least one match of the current expression using the match flags <i>flags</i>
|
||||
- see <a href="match_flag_type.html">match flags</a>. For each matching file
|
||||
calls the call-back function cb.
|
||||
<p>If the call-back returns false then the algorithm returns without considering
|
||||
any further files. </p>
|
||||
<p>The parameter <i>files</i> can include wild card characters '*' and '?', if the
|
||||
parameter <i>recurse</i> is true then searches sub-directories for matching
|
||||
file names. </p>
|
||||
<p>Returns the total number of files found.</p>
|
||||
<p>May throw an exception derived from std::runtime_error if file io fails.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> FindFiles(FindFilesCallback
|
||||
cb, <b>const</b> std::string& files, <b>bool</b> recurse = <b>false</b>,
|
||||
boost::match_flag_type flags = match_default);</td>
|
||||
<td valign="top" width="42%">Searches <i>files</i> to find all those which contain
|
||||
at least one match of the current expression using the match flags <i>flags</i>
|
||||
- see <a href="match_flag_type.html">match flags</a>. For each matching file
|
||||
calls the call-back function cb.
|
||||
<p>If the call-back returns false then the algorithm returns without considering
|
||||
any further files. </p>
|
||||
<p>The parameter <i>files</i> can include wild card characters '*' and '?', if the
|
||||
parameter <i>recurse</i> is true then searches sub-directories for matching
|
||||
file names. </p>
|
||||
<p>Returns the total number of files found.</p>
|
||||
<p>May throw an exception derived from std::runtime_error if file io fails.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">std::string Merge(<b>const</b> std::string& in, <b>const</b>
|
||||
std::string& fmt, <b>bool</b> copy = <b>true</b>, boost::match_flag_type
|
||||
flags = match_default);</td>
|
||||
<td valign="top" width="42%">Performs a search and replace operation: searches
|
||||
through the string <i>in</i> for all occurrences of the current expression, for
|
||||
each occurrence replaces the match with the format string <i>fmt</i>. Uses <i>flags</i>
|
||||
to determine what gets matched, and how the format string should be treated. If <i>
|
||||
copy</i> is true then all unmatched sections of input are copied unchanged
|
||||
to output, if the flag <em>format_first_only</em> is set then only the first
|
||||
occurance of the pattern found is replaced. Returns the new string. See <a href="format_syntax.html">
|
||||
also format string syntax</a>, <a href="match_flag_type.html">match flags</a>
|
||||
and <a href="match_flag_type.html">format flags</a>.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">std::string Merge(<b>const</b> char* in, <b>const</b>
|
||||
char* fmt, <b>bool</b> copy = <b>true</b>, boost::match_flag_type flags =
|
||||
match_default);</td>
|
||||
<td valign="top" width="42%">Performs a search and replace operation: searches
|
||||
through the string <i>in</i> for all occurrences of the current expression, for
|
||||
each occurrence replaces the match with the format string <i>fmt</i>. Uses <i>flags</i>
|
||||
to determine what gets matched, and how the format string should be treated. If <i>
|
||||
copy</i> is true then all unmatched sections of input are copied unchanged
|
||||
to output, if the flag <em>format_first_only</em> is set then only the first
|
||||
occurance of the pattern found is replaced. Returns the new string. See <a href="format_syntax.html">
|
||||
also format string syntax</a>, <a href="match_flag_type.html">match flags</a>
|
||||
and <a href="match_flag_type.html">format flags</a>.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td valign="top"><b>unsigned</b> Split(std::vector<std::string>& v,
|
||||
std::string& s, boost::match_flag_type flags = match_default, <b>unsigned</b>
|
||||
max_count = ~0);</td>
|
||||
<td valign="top">Splits the input string and pushes each one onto the vector. If
|
||||
the expression contains no marked sub-expressions, then one string is outputted
|
||||
for each section of the input that does not match the expression. If the
|
||||
expression does contain marked sub-expressions, then outputs one string for
|
||||
each marked sub-expression each time a match occurs. Outputs no more than <i>max_count</i>
|
||||
strings. Before returning, deletes from the input string <i>s</i> all of the
|
||||
input that has been processed (all of the string if <i>max_count</i> was not
|
||||
reached). Returns the number of strings pushed onto the vector.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Position(<b>int</b> i = 0)<b>const</b>;</td>
|
||||
<td valign="top" width="42%">Returns the position of what matched sub-expression <i>i</i>.
|
||||
If <i>i = 0</i> then returns the position of the whole match. Returns
|
||||
RegEx::npos if the supplied index is invalid, or if the specified
|
||||
sub-expression did not participate in the match.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Length(<b>int</b> i = 0)<b>const</b>;</td>
|
||||
<td valign="top" width="42%">Returns the length of what matched sub-expression <i>i</i>.
|
||||
If <i>i = 0</i> then returns the length of the whole match. Returns RegEx::npos
|
||||
if the supplied index is invalid, or if the specified sub-expression did not
|
||||
participate in the match.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td><strong>bool</strong> Matched(<strong>int</strong> i = 0)<strong>const</strong>;</td>
|
||||
<td>Returns true if sub-expression <em>i</em> was matched, false otherwise.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned</b> <b>int</b> Line()<b>const</b>;</td>
|
||||
<td valign="top" width="42%">Returns the line on which the match occurred, indexes
|
||||
start from 1 not zero, if no match occurred then returns RegEx::npos.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%"><b>unsigned int</b> Marks() const;</td>
|
||||
<td valign="top" width="42%">Returns the number of marked sub-expressions
|
||||
contained in the expression. Note that this includes the whole match
|
||||
(sub-expression zero), so the value returned is always >= 1.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">std::string What(<b>int</b> i)<b>const</b>;</td>
|
||||
<td valign="top" width="42%">Returns a copy of what matched sub-expression <i>i</i>.
|
||||
If <i>i = 0</i> then returns a copy of the whole match. Returns a null string
|
||||
if the index is invalid or if the specified sub-expression did not participate
|
||||
in a match.</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="7%"> </td>
|
||||
<td valign="top" width="43%">std::string <b>operator</b>[](<b>int</b> i)<b>const</b>
|
||||
;</td>
|
||||
<td valign="top" width="42%">Returns <i>what(i);</i>
|
||||
<p>Can be used to simplify access to sub-expression matches, and make usage more
|
||||
perl-like.</p>
|
||||
</td>
|
||||
<td valign="top" width="7%"> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,204 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<title>Boost.Regex: Algorithm regex_format (deprecated)</title>
|
||||
<meta http-equiv="Content-Type" content=
|
||||
"text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
|
||||
"C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
|
||||
<h2 align="center">Algorithm regex_format (deprecated)</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt=
|
||||
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<hr>
|
||||
<p>The algorithm regex_format is deprecated; new code should use
|
||||
match_results::format instead. Existing code will continue to
|
||||
compile, the following documentation is taken from the previous
|
||||
version of boost.regex and will not be further updated:</p>
|
||||
|
||||
<h3>Algorithm regex_format</h3>
|
||||
|
||||
<pre>
|
||||
#include <<a href="../../../boost/regex.hpp">boost/regex.hpp</a>>
|
||||
</pre>
|
||||
|
||||
<p>The algorithm regex_format takes the results of a match and
|
||||
creates a new string based upon a <a href="format_syntax.html">
|
||||
format string</a>, regex_format can be used for search and replace
|
||||
operations:</p>
|
||||
|
||||
<pre>
|
||||
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||||
OutputIterator regex_format(OutputIterator out,
|
||||
<b>const</b> match_results<iterator, Allocator>& m,
|
||||
<b>const</b> charT* fmt,
|
||||
match_flag_type flags = 0);
|
||||
<b>template</b> <<b>class</b> OutputIterator, <b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||||
OutputIterator regex_format(OutputIterator out,
|
||||
<b>const</b> match_results<iterator, Allocator>& m,
|
||||
<b>const</b> std::basic_string<charT>& fmt,
|
||||
match_flag_type flags = 0);
|
||||
</pre>
|
||||
|
||||
<p>The library also defines the following convenience variation of
|
||||
regex_format, which returns the result directly as a string, rather
|
||||
than outputting to an iterator [note - this version may not be
|
||||
available, or may be available in a more limited form, depending
|
||||
upon your compilers capabilities]:</p>
|
||||
|
||||
<pre>
|
||||
<b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||||
std::basic_string<charT> regex_format
|
||||
(<b>const</b> match_results<iterator, Allocator>& m,
|
||||
<b>const</b> charT* fmt,
|
||||
match_flag_type flags = 0);
|
||||
|
||||
<b>template</b> <<b>class</b> iterator, <b>class</b> Allocator, <b>class</b> charT>
|
||||
std::basic_string<charT> regex_format
|
||||
(<b>const</b> match_results<iterator, Allocator>& m,
|
||||
<b>const</b> std::basic_string<charT>& fmt,
|
||||
match_flag_type flags = 0);
|
||||
</pre>
|
||||
|
||||
<p>Parameters to the main version of the function are passed as
|
||||
follows:</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<table id="Table2" cellspacing="0" cellpadding="7" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%">OutputIterator out</td>
|
||||
<td valign="top" width="44%">An output iterator type, the output
|
||||
string is sent to this iterator. Typically this would be a
|
||||
std::ostream_iterator.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%"><b>const</b>
|
||||
match_results<iterator, Allocator>& m</td>
|
||||
<td valign="top" width="44%">An instance of match_results<>
|
||||
obtained from one of the matching algorithms above, and denoting
|
||||
what matched.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%"><b>const</b> charT* fmt</td>
|
||||
<td valign="top" width="44%">A format string that determines how
|
||||
the match is transformed into the new string.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%"><b>unsigned</b> flags</td>
|
||||
<td valign="top" width="44%">Optional flags which describe how the
|
||||
format string is to be interpreted.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<p><a name="format_flags"></a>Format flags are defined as
|
||||
follows:</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<table id="Table3" cellspacing="0" cellpadding="7" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%">format_all</td>
|
||||
<td valign="top" width="43%">Enables all syntax options (perl-like
|
||||
plus extentions).</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%">format_sed</td>
|
||||
<td valign="top" width="43%">Allows only a sed-like syntax.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%">format_perl</td>
|
||||
<td valign="top" width="43%">Allows only a perl-like syntax.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td valign="top" width="9%"> </td>
|
||||
<td valign="top" width="39%">format_no_copy</td>
|
||||
<td valign="top" width="43%">Disables copying of unmatched sections
|
||||
to the output string during <a href="regex_merge.html">
|
||||
regex_merge</a> operations.</td>
|
||||
<td valign="top" width="9%"> </td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td>format_first_only</td>
|
||||
<td>When this flag is set only the first occurance will be replaced
|
||||
(applies to regex_merge only).</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<p>The format string syntax (and available options) is described
|
||||
more fully under <a href="format_syntax.html">format strings</a>
|
||||
.</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -1,377 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_grep (deprecated)</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">Algorithm regex_grep (deprecated)</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<p>The algorithm regex_grep is deprecated in favor of <a href="regex_iterator.html">regex_iterator</a>
|
||||
which provides a more convenient and standard library friendly interface.</p>
|
||||
<p>The following documentation is taken unchanged from the previous boost release,
|
||||
and will not be updated in future.</p>
|
||||
<hr>
|
||||
<pre>
|
||||
#include <<a href="../../../boost/regex.hpp">boost/regex.hpp</a>>
|
||||
</pre>
|
||||
<p>regex_grep allows you to search through a bidirectional-iterator range and
|
||||
locate all the (non-overlapping) matches with a given regular expression. The
|
||||
function is declared as:</p>
|
||||
<pre>
|
||||
<b>template</b> <<b>class</b> Predicate, <b>class</b> iterator, <b>class</b> charT, <b>class</b> traits>
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
iterator first,
|
||||
iterator last,
|
||||
<b>const</b> basic_regex<charT, traits>& e,
|
||||
boost::match_flag_type flags = match_default)
|
||||
</pre>
|
||||
<p>The library also defines the following convenience versions, which take either
|
||||
a const charT*, or a const std::basic_string<>& in place of a pair of
|
||||
iterators [note - these versions may not be available, or may be available in a
|
||||
more limited form, depending upon your compilers capabilities]:</p>
|
||||
<pre>
|
||||
<b>template</b> <<b>class</b> Predicate, <b>class</b> charT, <b>class</b> traits>
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> charT* str,
|
||||
<b>const</b> basic_regex<charT, traits>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> charT, <b>class</b> traits>
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||||
<b>const</b> basic_regex<charT, traits>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
</pre>
|
||||
<p>The parameters for the primary version of regex_grep have the following
|
||||
meanings: </p>
|
||||
<p></p>
|
||||
<table id="Table2" cellspacing="0" cellpadding="7" width="624" border="0">
|
||||
<tr>
|
||||
<td width="5%"> </td>
|
||||
<td valign="top" width="50%">foo</td>
|
||||
<td valign="top" width="50%">A predicate function object or function pointer, see
|
||||
below for more information.</td>
|
||||
<td width="5%"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td valign="top" width="50%">first</td>
|
||||
<td valign="top" width="50%">The start of the range to search.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td valign="top" width="50%">last</td>
|
||||
<td valign="top" width="50%">The end of the range to search.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td valign="top" width="50%">e</td>
|
||||
<td valign="top" width="50%">The regular expression to search for.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
<td valign="top" width="50%">flags</td>
|
||||
<td valign="top" width="50%">The flags that determine how matching is carried out,
|
||||
one of the <a href="match_flag_type.html">match_flags</a> enumerators.</td>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<p>The algorithm finds all of the non-overlapping matches of the expression e, for
|
||||
each match it fills a <a href="match_results.html">match_results</a><iterator>
|
||||
structure, which contains information on what matched, and calls the predicate
|
||||
foo, passing the match_results<iterator> as a single argument. If the
|
||||
predicate returns true, then the grep operation continues, otherwise it
|
||||
terminates without searching for further matches. The function returns the
|
||||
number of matches found.</p>
|
||||
<p>The general form of the predicate is:</p>
|
||||
<pre>
|
||||
<b>struct</b> grep_predicate
|
||||
{
|
||||
<b> bool</b> <b>operator</b>()(<b>const</b> match_results<iterator_type>& m);
|
||||
};
|
||||
</pre>
|
||||
<p>For example the regular expression "a*b" would find one match in the string
|
||||
"aaaaab" and two in the string "aaabb".</p>
|
||||
<p>Remember this algorithm can be used for a lot more than implementing a version
|
||||
of grep, the predicate can be and do anything that you want, grep utilities
|
||||
would output the results to the screen, another program could index a file
|
||||
based on a regular expression and store a set of bookmarks in a list, or a text
|
||||
file conversion utility would output to file. The results of one regex_grep can
|
||||
even be chained into another regex_grep to create recursive parsers.</p>
|
||||
<P>The algorithm may throw <CODE>std::runtime_error</CODE> if the complexity
|
||||
of matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<p><a href="../example/snippets/regex_grep_example_1.cpp"> Example</a>: convert
|
||||
the example from <i>regex_search</i> to use <i>regex_grep</i> instead:</p>
|
||||
<pre>
|
||||
<font color="#008000">#include <string>
|
||||
#include <map>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
</font><font color="#000080"><i>// IndexClasses:
|
||||
// takes the contents of a file in the form of a string
|
||||
// and searches for all the C++ class definitions, storing
|
||||
// their locations in a map of strings/int's
|
||||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||||
|
||||
const char* re =
|
||||
// possibly leading whitespace:
|
||||
"^[[:space:]]*"
|
||||
// possible template declaration:
|
||||
"(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||||
// class or struct:
|
||||
"(class|struct)[[:space:]]*"
|
||||
// leading declspec macros etc:
|
||||
"("
|
||||
"\\<\\w+\\>"
|
||||
"("
|
||||
"[[:blank:]]*\\([^)]*\\)"
|
||||
")?"
|
||||
"[[:space:]]*"
|
||||
")*"
|
||||
// the class name
|
||||
"(\\<\\w*\\>)[[:space:]]*"
|
||||
// template specialisation parameters
|
||||
"(<[^;:{]+>)?[[:space:]]*"
|
||||
// terminate in { or :
|
||||
"(\\{|:[^;\\{()]*\\{)";
|
||||
|
||||
boost::regex expression(re);
|
||||
<b>class</b> IndexClassesPred
|
||||
{
|
||||
map_type& m;
|
||||
std::string::const_iterator base;
|
||||
<b>public</b>:
|
||||
IndexClassesPred(map_type& a, std::string::const_iterator b) : m(a), base(b) {}
|
||||
<b>bool</b> <b>operator</b>()(<b>const</b> smatch& what)
|
||||
{
|
||||
<font color=
|
||||
#000080> <i>// what[0] contains the whole string
|
||||
</i> <i>// what[5] contains the class name.
|
||||
</i> <i>// what[6] contains the template specialisation if any.
|
||||
</i> <i>// add class name and position to map:
|
||||
</i></font> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||||
what[5].first - base;
|
||||
<b>return</b> <b>true</b>;
|
||||
}
|
||||
};
|
||||
<b>void</b> IndexClasses(map_type& m, <b>const</b> std::string& file)
|
||||
{
|
||||
std::string::const_iterator start, end;
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
regex_grep(IndexClassesPred(m, start), start, end, expression);
|
||||
}
|
||||
</pre>
|
||||
<p><a href="../example/snippets/regex_grep_example_2.cpp"> Example</a>: Use
|
||||
regex_grep to call a global callback function:</p>
|
||||
<pre>
|
||||
<font color="#008000">#include <string>
|
||||
#include <map>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
</font><font color="#000080"><i>// purpose:
|
||||
// takes the contents of a file in the form of a string
|
||||
// and searches for all the C++ class definitions, storing
|
||||
// their locations in a map of strings/int's
|
||||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||||
|
||||
const char* re =
|
||||
// possibly leading whitespace:
|
||||
"^[[:space:]]*"
|
||||
// possible template declaration:
|
||||
"(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||||
// class or struct:
|
||||
"(class|struct)[[:space:]]*"
|
||||
// leading declspec macros etc:
|
||||
"("
|
||||
"\\<\\w+\\>"
|
||||
"("
|
||||
"[[:blank:]]*\\([^)]*\\)"
|
||||
")?"
|
||||
"[[:space:]]*"
|
||||
")*"
|
||||
// the class name
|
||||
"(\\<\\w*\\>)[[:space:]]*"
|
||||
// template specialisation parameters
|
||||
"(<[^;:{]+>)?[[:space:]]*"
|
||||
// terminate in { or :
|
||||
"(\\{|:[^;\\{()]*\\{)";
|
||||
|
||||
boost::regex expression(re);
|
||||
map_type class_index;
|
||||
std::string::const_iterator base;
|
||||
|
||||
<b>bool</b> grep_callback(<b>const</b> boost::smatch& what)
|
||||
{
|
||||
<font color="#000080"> <i>// what[0] contains the whole string
|
||||
</i> <i>// what[5] contains the class name.
|
||||
</i> <i>// what[6] contains the template specialisation if any.
|
||||
</i> <i>// add class name and position to map:
|
||||
</i></font> class_index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||||
what[5].first - base;
|
||||
<b>return</b> <b>true</b>;
|
||||
}
|
||||
<b>void</b> IndexClasses(<b>const</b> std::string& file)
|
||||
{
|
||||
std::string::const_iterator start, end;
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
base = start;
|
||||
regex_grep(grep_callback, start, end, expression, match_default);
|
||||
}
|
||||
|
||||
</pre>
|
||||
<p><a href="../example/snippets/regex_grep_example_3.cpp"> Example</a>: use
|
||||
regex_grep to call a class member function, use the standard library adapters <i>std::mem_fun</i>
|
||||
and <i>std::bind1st</i> to convert the member function into a predicate:</p>
|
||||
<pre>
|
||||
<font color="#008000">#include <string>
|
||||
#include <map>
|
||||
#include <boost/regex.hpp>
|
||||
#include <functional>
|
||||
</font><font color="#000080"><i>// purpose:
|
||||
// takes the contents of a file in the form of a string
|
||||
// and searches for all the C++ class definitions, storing
|
||||
// their locations in a map of strings/int's
|
||||
|
||||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||||
<b>class</b> class_index
|
||||
{
|
||||
boost::regex expression;
|
||||
map_type index;
|
||||
std::string::const_iterator base;
|
||||
<b>bool</b> grep_callback(boost::smatch what);
|
||||
<b>public</b>:
|
||||
<b> void</b> IndexClasses(<b>const</b> std::string& file);
|
||||
class_index()
|
||||
: index(),
|
||||
expression(<font color=
|
||||
#000080>"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||||
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
||||
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
||||
"(\\{|:[^;\\{()]*\\{)"
|
||||
</font> ){}
|
||||
};
|
||||
<b>bool</b> class_index::grep_callback(boost::smatch what)
|
||||
{
|
||||
<font color="#000080"> <i>// what[0] contains the whole string
|
||||
</i> <i>// what[5] contains the class name.
|
||||
</i> <i>// what[6] contains the template specialisation if any.
|
||||
</i> <i>// add class name and position to map:
|
||||
</i></font> index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||||
what[5].first - base;
|
||||
<b>return</b> <b>true</b>;
|
||||
}
|
||||
|
||||
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
||||
{
|
||||
std::string::const_iterator start, end;
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
base = start;
|
||||
regex_grep(std::bind1st(std::mem_fun(&class_index::grep_callback), <b>this</b>),
|
||||
start,
|
||||
end,
|
||||
expression);
|
||||
}
|
||||
|
||||
</pre>
|
||||
<p><a href="../example/snippets/regex_grep_example_4.cpp"> Finally</a>, C++
|
||||
Builder users can use C++ Builder's closure type as a callback argument:</p>
|
||||
<pre>
|
||||
<font color="#008000">#include <string>
|
||||
#include <map>
|
||||
#include <boost/regex.hpp>
|
||||
#include <functional>
|
||||
</font><font color="#000080"><i>// purpose:
|
||||
// takes the contents of a file in the form of a string
|
||||
// and searches for all the C++ class definitions, storing
|
||||
// their locations in a map of strings/int's
|
||||
|
||||
</i></font><b>typedef</b> std::map<std::string, <b>int</b>, std::less<std::string> > map_type;
|
||||
<b>class</b> class_index
|
||||
{
|
||||
boost::regex expression;
|
||||
map_type index;
|
||||
std::string::const_iterator base;
|
||||
<b>typedef</b> boost::smatch arg_type;
|
||||
<b>bool</b> grep_callback(<b>const</b> arg_type& what);
|
||||
<b>public</b>:
|
||||
<b>typedef</b> <b>bool</b> (<b>__closure</b>* grep_callback_type)(<b>const</b> arg_type&);
|
||||
<b>void</b> IndexClasses(<b>const</b> std::string& file);
|
||||
class_index()
|
||||
: index(),
|
||||
expression(<font color=
|
||||
#000080>"^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||||
"(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?"
|
||||
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
||||
"(\\{|:[^;\\{()]*\\{)"
|
||||
</font> ){}
|
||||
};
|
||||
|
||||
<b>bool</b> class_index::grep_callback(<b>const</b> arg_type& what)
|
||||
{
|
||||
<font color=
|
||||
#000080> <i>// what[0] contains the whole string</i>
|
||||
<i>// what[5] contains the class name.</i>
|
||||
<i>// what[6] contains the template specialisation if any.</i>
|
||||
<i>// add class name and position to map:</i></font>
|
||||
index[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||||
what[5].first - base;
|
||||
<b>return</b> <b>true</b>;
|
||||
}
|
||||
|
||||
<b>void</b> class_index::IndexClasses(<b>const</b> std::string& file)
|
||||
{
|
||||
std::string::const_iterator start, end;
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
base = start;
|
||||
class_index::grep_callback_type cl = &(<b>this</b>->grep_callback);
|
||||
regex_grep(cl,
|
||||
start,
|
||||
end,
|
||||
expression);
|
||||
}
|
||||
</pre>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,456 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: regex_iterator</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link href="../../../boost.css" type="text/css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">regex_iterator</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Contents</h3>
|
||||
<dl class="index">
|
||||
<dt><a href="#synopsis">Synopsis</a> <dt><a href="#description">Description</a> <dt><a href="#examples">
|
||||
Examples</a></dt>
|
||||
</dl>
|
||||
<h3><a name="synopsis"></a>Synopsis</h3>
|
||||
<p>The iterator type regex_iterator will enumerate all of the regular expression
|
||||
matches found in some sequence: dereferencing a regex_iterator yields a
|
||||
reference to a <a href="match_results.html">match_results</a> object.</p>
|
||||
<pre>
|
||||
template <class BidirectionalIterator,
|
||||
class charT = iterator_traits<BidirectionalIterator>::value_type,
|
||||
class traits = regex_traits<charT> >
|
||||
class regex_iterator
|
||||
{
|
||||
public:
|
||||
typedef <A href="basic_regex.html">basic_regex</A><charT, traits> regex_type;
|
||||
typedef <A href="match_results.html">match_results</A><BidirectionalIterator> value_type;
|
||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||
typedef const value_type* pointer;
|
||||
typedef const value_type& reference;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
|
||||
<A href="#c1">regex_iterator</A>();
|
||||
<A href="#c2">regex_iterator</A>(BidirectionalIterator a, BidirectionalIterator b,
|
||||
const regex_type& re,
|
||||
<A href="match_flag_type.html">match_flag_type</A> m = match_default);
|
||||
<A href="#c3">regex_iterator</A>(const regex_iterator&);
|
||||
regex_iterator& <A href="#o1">operator</A>=(const regex_iterator&);
|
||||
bool <A href="#o2">operator</A>==(const regex_iterator&)const;
|
||||
bool <A href="#o3">operator</A>!=(const regex_iterator&)const;
|
||||
const value_type& <A href="#o4">operator</A>*()const;
|
||||
const value_type* <A href="#o5">operator</A>->()const;
|
||||
regex_iterator& <A href="#o6">operator</A>++();
|
||||
regex_iterator <A href="#o7">operator</A>++(int);
|
||||
};
|
||||
|
||||
typedef
|
||||
regex_iterator<const
|
||||
|
||||
char*> cregex_iterator; typedef regex_iterator<std::string::const_iterator>
|
||||
sregex_iterator; #ifndef BOOST_NO_WREGEX
|
||||
typedef regex_iterator<const
|
||||
wchar_t*> wcregex_iterator; typedef regex_iterator<std::wstring::const_iterator>
|
||||
wsregex_iterator; #endif template
|
||||
<class
|
||||
|
||||
charT, class traits> regex_iterator<const charT*,
|
||||
charT, traits>
|
||||
<A href="#make_regex_iterator">make_regex_iterator</A>(const charT* p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m = regex_constants::match_default); template <class
|
||||
|
||||
charT, class traits, class ST, class SA> regex_iterator<typename std::basic_string<charT,
|
||||
ST, SA>::const_iterator, charT, traits>
|
||||
<A href="#make_regex_iterator">make_regex_iterator</A>(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
</pre>
|
||||
<h3><a name="description"></a>Description</h3>
|
||||
<p>A regex_iterator is constructed from a pair of iterators, and enumerates all
|
||||
occurrences of a regular expression within that iterator range.</p>
|
||||
<pre><A name=c1></A>
|
||||
regex_iterator();
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> constructs an end of sequence regex_iterator.</p>
|
||||
<pre><A name=c2></A>regex_iterator(BidirectionalIterator a, BidirectionalIterator b,
|
||||
const regex_type& re,
|
||||
<A href="match_flag_type.html">match_flag_type</A> m = match_default);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> constructs a regex_iterator that will enumerate all occurrences
|
||||
of the expression <em>re</em>, within the sequence <em>[a,b)</em>, and found
|
||||
using match flags <em>m</em>. The object <em>re</em> must exist for the
|
||||
lifetime of the regex_iterator.</p>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<pre><A name=c3></A>
|
||||
regex_iterator(const regex_iterator& that);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> constructs a copy of <code>that</code>.</p>
|
||||
<b></b>
|
||||
<p><b>Postconditions:</b> <code>*this == that</code>.</p>
|
||||
<pre><A name=o1></A>
|
||||
regex_iterator& operator=(const regex_iterator&);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> sets <code>*this</code> equal to those in <code>that</code>.</p>
|
||||
<b></b>
|
||||
<p><b>Postconditions:</b> <code>*this == that</code>.</p>
|
||||
<pre><A name=o2></A>
|
||||
bool operator==(const regex_iterator& that)const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> returns true if *this is equal to that.</p>
|
||||
<pre><A name=o3></A>
|
||||
bool operator!=(const regex_iterator&)const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> returns <code>!(*this == that)</code>.</p>
|
||||
<pre><A name=o4></A>
|
||||
const value_type& operator*()const;
|
||||
</pre>
|
||||
<p><b>Effects:</b> dereferencing a regex_iterator object <em>it</em> yields a
|
||||
const reference to a <a href="match_results.html">match_results</a> object,
|
||||
whose members are set as follows:</p>
|
||||
<p></p>
|
||||
<table id="Table2" cellspacing="1" cellpadding="7" width="624" border="1">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="50%"><b></b>
|
||||
<p><b>Element</b></p>
|
||||
</td>
|
||||
<td valign="top" width="50%"><b></b>
|
||||
<p><b>Value</b></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).size()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>re.mark_count()</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).empty()</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>false</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).prefix().first</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The end of the last match found, or the start of the underlying sequence if
|
||||
this is the first match enumerated</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).prefix().last</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The same as the start of the match found:<BR>
|
||||
(*it)[0].first</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).prefix().matched</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>True if the prefix did not match an empty string:<BR>
|
||||
(*it).prefix().first != (*it).prefix().second</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).suffix().first</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The same as the end of the match found:<BR>
|
||||
(*it)[0].second</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).suffix().last</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The end of the underlying sequence.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it).suffix().matched</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>True if the suffix did not match an empty string:<BR>
|
||||
(*it).suffix().first != (*it).suffix().second</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[0].first</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The start of the sequence of characters that matched the regular expression</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[0].second</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>The end of the sequence of characters that matched the regular expression</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[0].matched</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p><code>true</code> if a full match was found, and <code>false</code> if it was a
|
||||
partial match (found as a result of the <code>match_partial</code> flag being
|
||||
set).</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[n].first</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>For all integers n < (*it).size(), the start of the sequence that matched
|
||||
sub-expression <i>n</i>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <i>last</i>.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[n].second</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>For all integers n < (*it).size(), the end of the sequence that matched
|
||||
sub-expression <i>n</i>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <i>last</i>.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">
|
||||
<p>(*it)[n].matched</p>
|
||||
</td>
|
||||
<td valign="top" width="50%">
|
||||
<p>For all integers n < (*it).size(), true if sub-expression <i>n</i> participated
|
||||
in the match, false otherwise.</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="50%">(*it).position(n)</td>
|
||||
<td valign="top" width="50%">For all integers n < (*it).size(), then the
|
||||
distance from the start of the underlying sequence to the start of
|
||||
sub-expression match <em>n</em>.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<pre><A name=o5></A>
|
||||
const value_type* operator->()const;
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> returns <code>&(*this)</code>.</p>
|
||||
<pre><A name=o6></A>
|
||||
regex_iterator& operator++();
|
||||
</pre>
|
||||
<p><strong>Effects:</strong> moves the iterator to the next match in the
|
||||
underlying sequence, or the end of sequence iterator if none if found.
|
||||
When the last match found matched a zero length string, then the
|
||||
regex_iterator will find the next match as follows: if there exists a non-zero
|
||||
length match that starts at the same location as the last one, then returns it,
|
||||
otherwise starts looking for the next (possibly zero length) match from one
|
||||
position to the right of the last match.</p>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<b></b>
|
||||
<p><b>Returns:</b> <code>*this</code>.</p>
|
||||
<pre><A name=o7></A>
|
||||
regex_iterator operator++(int);
|
||||
</pre>
|
||||
<b></b>
|
||||
<p><b>Effects:</b> constructs a copy <code>result</code> of <code>*this</code>,
|
||||
then calls <code>++(*this)</code>.</p>
|
||||
<b></b>
|
||||
<p><b>Returns:</b> <code>result</code>.</p>
|
||||
<PRE><A name=make_regex_iterator></A>template <class charT, class traits> regex_iterator<const charT*, charT, traits>
|
||||
make_regex_iterator(const charT*
|
||||
p, const basic_regex<charT,
|
||||
traits>& e, regex_constants::match_flag_type m
|
||||
= regex_constants::match_default); template <class
|
||||
|
||||
charT, class traits, class ST, class SA> regex_iterator<typename std::basic_string<charT,
|
||||
ST, SA>::const_iterator, charT, traits>
|
||||
make_regex_iterator(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
</PRE>
|
||||
<P><STRONG>Effects:</STRONG> returns an iterator that enumerates all occurences of
|
||||
expression <EM>e</EM> in text <EM>p</EM> using match_flags <EM>m</EM>.</P>
|
||||
<h3><a name=examples></a>Examples</h3>
|
||||
<p>The following <a href="../example/snippets/regex_iterator_example.cpp">example</a>
|
||||
takes a C++ source file and builds up an index of class names, and the location
|
||||
of that class in the file.</p>
|
||||
<pre>
|
||||
<font color="#008040">#include <string></font>
|
||||
<font color="#008040">#include <map></font>
|
||||
<font color="#008040">#include <fstream></font>
|
||||
<font color="#008040">#include <iostream></font>
|
||||
<font color="#008040">#include <boost/regex.hpp></font>
|
||||
|
||||
<b>using</b> <b>namespace</b> std;
|
||||
|
||||
<i><font color="#000080">// purpose:</font></i>
|
||||
<i><font color=
|
||||
#000080>// takes the contents of a file in the form of a string</font></i>
|
||||
<i><font color=
|
||||
#000080>// and searches for all the C++ class definitions, storing</font></i>
|
||||
<i><font color=
|
||||
#000080>// their locations in a map of strings/int's</font></i>
|
||||
|
||||
<b>typedef</b> std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
|
||||
|
||||
<b>const</b> <b>char</b>* re =
|
||||
<i><font color=
|
||||
#000080>// possibly leading whitespace: </font></i>
|
||||
<font color="#0000ff">"^[[:space:]]*"</font>
|
||||
<i><font color=
|
||||
#000080>// possible template declaration:</font></i>
|
||||
<font color=
|
||||
#0000ff>"(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"</font>
|
||||
<i><font color="#000080">// class or struct:</font></i>
|
||||
<font color="#0000ff">"(class|struct)[[:space:]]*"</font>
|
||||
<i><font color=
|
||||
#000080>// leading declspec macros etc:</font></i>
|
||||
<font color="#0000ff">"("</font>
|
||||
<font color="#0000ff">"\\<\\w+\\>"</font>
|
||||
<font color="#0000ff">"("</font>
|
||||
<font color="#0000ff">"[[:blank:]]*\\([^)]*\\)"</font>
|
||||
<font color="#0000ff">")?"</font>
|
||||
<font color="#0000ff">"[[:space:]]*"</font>
|
||||
<font color="#0000ff">")*"</font>
|
||||
<i><font color="#000080">// the class name</font></i>
|
||||
<font color="#0000ff">"(\\<\\w*\\>)[[:space:]]*"</font>
|
||||
<i><font color=
|
||||
#000080>// template specialisation parameters</font></i>
|
||||
<font color="#0000ff">"(<[^;:{]+>)?[[:space:]]*"</font>
|
||||
<i><font color="#000080">// terminate in { or :</font></i>
|
||||
<font color="#0000ff">"(\\{|:[^;\\{()]*\\{)"</font>;
|
||||
|
||||
|
||||
boost::regex expression(re);
|
||||
map_type class_index;
|
||||
|
||||
<b>bool</b> regex_callback(<b>const</b> boost::match_results<std::string::const_iterator>& what)
|
||||
{
|
||||
<i><font color=
|
||||
#000080>// what[0] contains the whole string</font></i>
|
||||
<i><font color=
|
||||
#000080>// what[5] contains the class name.</font></i>
|
||||
<i><font color=
|
||||
#000080>// what[6] contains the template specialisation if any.</font></i>
|
||||
<i><font color=
|
||||
#000080>// add class name and position to map:</font></i>
|
||||
class_index[what[<font color=
|
||||
#0000a0>5</font>].str() + what[<font color=
|
||||
#0000a0>6</font>].str()] = what.position(<font color=
|
||||
#0000a0>5</font>);
|
||||
<b>return</b> <b>true</b>;
|
||||
}
|
||||
|
||||
<b>void</b> load_file(std::string& s, std::istream& is)
|
||||
{
|
||||
s.erase();
|
||||
s.reserve(is.rdbuf()->in_avail());
|
||||
<b>char</b> c;
|
||||
<b>while</b>(is.get(c))
|
||||
{
|
||||
<b>if</b>(s.capacity() == s.size())
|
||||
s.reserve(s.capacity() * <font color="#0000a0">3</font>);
|
||||
s.append(<font color="#0000a0">1</font>, c);
|
||||
}
|
||||
}
|
||||
|
||||
<b>int</b> main(<b>int</b> argc, <b>const</b> <b>char</b>** argv)
|
||||
{
|
||||
std::string text;
|
||||
<b>for</b>(<b>int</b> i = <font color=
|
||||
#0000a0>1</font>; i < argc; ++i)
|
||||
{
|
||||
cout << <font color=
|
||||
#0000ff>"Processing file "</font> << argv[i] << endl;
|
||||
std::ifstream fs(argv[i]);
|
||||
load_file(text, fs);
|
||||
<i><font color=
|
||||
#000080>// construct our iterators:</font></i>
|
||||
boost::sregex_iterator m1(text.begin(), text.end(), expression);
|
||||
boost::sregex_iterator m2;
|
||||
std::for_each(m1, m2, &regex_callback);
|
||||
<i><font color="#000080">// copy results:</font></i>
|
||||
cout << class_index.size() << <font color=
|
||||
#0000ff>" matches found"</font> << endl;
|
||||
map_type::iterator c, d;
|
||||
c = class_index.begin();
|
||||
d = class_index.end();
|
||||
<b>while</b>(c != d)
|
||||
{
|
||||
cout << <font color=
|
||||
#0000ff>"class \""</font> << (*c).first << <font
|
||||
color=
|
||||
#0000ff>"\" found at index: "</font> << (*c).second << endl;
|
||||
++c;
|
||||
}
|
||||
class_index.erase(class_index.begin(), class_index.end());
|
||||
}
|
||||
<b>return</b> <font color="#0000a0">0</font>;
|
||||
}
|
||||
</pre>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
06 Jan 05
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2005<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,318 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_match</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Algorithm regex_match</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><a href="#synopsis">Synopsis</a> <dt><a href="#description">Description</a> <dt><a href="#examples">
|
||||
Examples</a></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<PRE><A name=query_match></A>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>> </PRE>
|
||||
<P>
|
||||
The algorithm regex _match determines whether a given regular expression
|
||||
matches all of a given character sequence denoted by a pair of
|
||||
bidirectional-iterators, the algorithm is defined as follows, the main use of
|
||||
this function is data input validation.
|
||||
<P><STRONG>Note that the result is true only if the expression matches the whole of
|
||||
the input sequence. </STRONG> If you want to search for an expression
|
||||
somewhere within the sequence then use <A href="regex_search.html">regex_search</A>.
|
||||
If you want to match a prefix of the character string then use <A href="regex_search.html">
|
||||
regex_search</A> with the flag <A href="match_flag_type.html">match_continuous</A>
|
||||
set.
|
||||
<PRE>
|
||||
template <class BidirectionalIterator, class Allocator, class charT, class traits>
|
||||
bool <A href="#f1">regex_match</A>(BidirectionalIterator first, BidirectionalIterator last,
|
||||
<A href="match_results.html">match_results</A><BidirectionalIterator, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
|
||||
template <class BidirectionalIterator, class charT, class traits>
|
||||
bool <A href="#f2">regex_match</A>(BidirectionalIterator first, BidirectionalIterator last,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
|
||||
template <class charT, class Allocator, class traits>
|
||||
bool <A href="#f3">regex_match</A>(const charT* str, <A href="match_results.html">match_results</A><const charT*, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
|
||||
template <class ST, class SA, class Allocator, class charT, class traits>
|
||||
bool <A href="#f4">regex_match</A>(const basic_string<charT, ST, SA>& s,
|
||||
<A href="match_results.html">match_results</A><typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
|
||||
template <class charT, class traits>
|
||||
bool <A href="#f5">regex_match</A>(const charT* str,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
|
||||
template <class ST, class SA, class charT, class traits>
|
||||
bool <A href="#f6">regex_match</A>(const basic_string<charT, ST, SA>& s,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<PRE><A name=f1></A>template <class BidirectionalIterator, class Allocator, class charT, class traits>
|
||||
bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
|
||||
<A href="match_results.html">match_results</A><BidirectionalIterator, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Requires:</B> Type BidirectionalIterator meets the requirements of a
|
||||
Bidirectional Iterator (24.1.4).</P>
|
||||
<P><B> Effects: </B>Determines whether there is an exact match between the regular
|
||||
expression <I>e</I>, and all of the character sequence [first, last), parameter <I>
|
||||
flags</I> is used to <A href="match_flag_type.html">control how the expression
|
||||
is matched</A> against the character sequence. Returns true if such a match
|
||||
exists, false otherwise.</P>
|
||||
<P><STRONG>Throws:</STRONG> <code>std::runtime_error</code> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<P><B> Postconditions: </B>If the function returns false, then the effect on
|
||||
parameter <I>m</I> is undefined, otherwise the effects on parameter <I>m</I> are
|
||||
given in the table:</P>
|
||||
<P align="center">
|
||||
<CENTER>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><B> Element</B>
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><B> Value</B>
|
||||
</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.size()</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>e.mark_count()</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.empty()</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>false</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>first</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().last</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>first</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>false</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>last</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().last</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>last</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>false</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>first</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].second</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>last</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><CODE> true</CODE> if a full match was found, and <CODE>false</CODE> if it was
|
||||
a partial match (found as a result of the <CODE>match_partial</CODE> flag being
|
||||
set).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), the start of the sequence that matched
|
||||
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <I>last</I>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].second</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), the end of the sequence that matched
|
||||
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <I>last</I>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), true if sub-expression <I>n</I> participated
|
||||
in the match, false otherwise.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></TD></TR></TABLE>
|
||||
</CENTER>
|
||||
<P></P>
|
||||
<DIV></DIV>
|
||||
<PRE> </PRE>
|
||||
<PRE><A name=f2></A>template <class BidirectionalIterator, class charT, class traits>
|
||||
bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Behaves "as if" by constructing an instance of <CODE><A href="match_results.html">
|
||||
match_results</A><</CODE>BidirectionalIterator<CODE>> what</CODE>,
|
||||
and then returning the result of <CODE>regex_match(first, last, what, e, flags)</CODE>.</P>
|
||||
<PRE><A name=f3></A>template <class charT, class Allocator, class traits>
|
||||
bool regex_match(const charT* str, <A href="match_results.html">match_results</A><const charT*, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_match(str, str +
|
||||
char_traits<charT>::length(str), m, e, flags)</CODE>.</P>
|
||||
<PRE><A name=f4></A>template <class ST, class SA, class Allocator,
|
||||
class charT, class traits>
|
||||
bool regex_match(const basic_string<charT, ST, SA>& s,
|
||||
<A href="match_results.html">match_results</A><typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_match(s.begin(), s.end(), m, e,
|
||||
flags)</CODE>.</P>
|
||||
<PRE><A name=f5></A>template <class charT, class traits>
|
||||
bool regex_match(const charT* str,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_match(str, str +
|
||||
char_traits<charT>::length(str), e, flags)</CODE>.</P>
|
||||
<PRE><A name=f6></A>template <class ST, class SA, class charT, class traits>
|
||||
bool regex_match(const basic_string<charT, ST, SA>& s,
|
||||
const <A href="basic_regex.html">basic_regex</A> <charT, traits>& e,
|
||||
<A href="match_flag_type.html">match_flag_type</A> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_match(s.begin(), s.end(), e,
|
||||
flags)</CODE>.
|
||||
<H3><A name="examples"></A>Examples</H3>
|
||||
<P>The following <A href="../example/snippets/regex_match_example.cpp">example</A>
|
||||
processes an ftp response:
|
||||
<P></P>
|
||||
<PRE><FONT color=#008000>#include <stdlib.h>
|
||||
#include <boost/regex.hpp>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
</FONT><B>using namespace</B> boost;
|
||||
|
||||
regex expression(<FONT color=#000080>"([0-9]+)(\\-| |$)(.*)"</FONT>);
|
||||
|
||||
<FONT color=#000080><I>// process_ftp:
|
||||
// on success returns the ftp response code, and fills
|
||||
// msg with the ftp response message.
|
||||
</I></FONT><B>int</B> process_ftp(<B>const</B> <B>char</B>* response, std::string* msg)
|
||||
{
|
||||
cmatch what;
|
||||
<B>if</B>(regex_match(response, what, expression))
|
||||
{
|
||||
<FONT color=#000080> <I>// what[0] contains the whole string
|
||||
</I> <I>// what[1] contains the response code
|
||||
</I> <I>// what[2] contains the separator character
|
||||
</I> <I>// what[3] contains the text message.
|
||||
</I></FONT> <B>if</B>(msg)
|
||||
msg->assign(what[3].first, what[3].second);
|
||||
<B>return</B> std::atoi(what[1].first);
|
||||
}
|
||||
<FONT color=#000080> <I>// failure did not match
|
||||
</I></FONT> <B>if</B>(msg)
|
||||
msg->erase();
|
||||
<B>return</B> -1;
|
||||
}
|
||||
<P>
|
||||
<HR></PRE>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,45 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_merge (deprecated)</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Algorithm regex_merge (deprecated)</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Algorithm regex_merge has been renamed <A href="regex_replace.html">regex_replace</A>,
|
||||
existing code will continue to compile, but new code should use <A href="regex_replace.html">
|
||||
regex_replace</A> instead.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,256 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_replace</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Algorithm regex_replace</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
|
||||
Examples</A></dt></dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<PRE>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>> </PRE>
|
||||
<P>The algorithm regex_replace searches through a string finding
|
||||
all the matches to the regular expression: for each match it then calls <A href="match_results.html#format">
|
||||
match_results::format</A> to format the string and sends the result to the
|
||||
output iterator. Sections of text that do not match are copied to the output
|
||||
unchanged only if the <EM>flags</EM> parameter does not have the flag <A href="match_flag_type.html">
|
||||
format_no_copy</A> set. If the flag <A href="match_flag_type.html">format_first_only</A>
|
||||
is set then only the first occurrence is replaced rather than all
|
||||
occurrences. <PRE>template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
|
||||
OutputIterator <A href="#f1">regex_replace</A>(OutputIterator out,
|
||||
BidirectionalIterator first,
|
||||
BidirectionalIterator last,
|
||||
const <A href="basic_regex.html">basic_regex</A><charT, traits>& e,
|
||||
const basic_string<charT>& fmt,
|
||||
<A href="match_flag_type.html">match_flag_type flags = match_default</A>);
|
||||
|
||||
template <class traits, class charT>
|
||||
basic_string<charT> <A href="#f2">regex_replace</A>(const basic_string<charT>& s,
|
||||
const <A href="basic_regex.html">basic_regex</A><charT, traits>& e,
|
||||
const basic_string<charT>& fmt,
|
||||
<A href="match_flag_type.html">match_flag_type flags = match_default</A>);
|
||||
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<PRE><A name=f1></A>template <class OutputIterator, class BidirectionalIterator, class traits, class charT>
|
||||
OutputIterator regex_replace(OutputIterator out,
|
||||
BidirectionalIterator first,
|
||||
BidirectionalIterator last,
|
||||
const <A href="basic_regex.html">basic_regex</A><charT, traits>& e,
|
||||
const basic_string<charT>& fmt,
|
||||
<A href="match_flag_type.html">match_flag_type flags = match_default</A>);</PRE>
|
||||
<P>Enumerates all the occurences of expression <EM>e</EM> in the sequence [first,
|
||||
last), replacing each occurence with the string that results by merging the
|
||||
match found with the format string <EM>fmt</EM>, and copies the resulting
|
||||
string to <EM>out</EM>. </P>
|
||||
<P>If the flag format_no_copy is set in <EM>flags</EM> then unmatched sections of
|
||||
text are not copied to output.
|
||||
</P>
|
||||
<P>If the flag format_first_only is set in <EM>flags</EM> then only the first
|
||||
occurence of <EM>e</EM> is replaced.
|
||||
</P>
|
||||
<P>The manner in which the format string <EM>fmt</EM> is interpretted, along with
|
||||
the rules used for finding matches, are determined by the <A href="match_flag_type.html">
|
||||
flags</A> set in <EM>flags</EM></P>
|
||||
<P><B>Effects:</B> Constructs an
|
||||
<SPAN class="spelle">regex_iterator</SPAN>
|
||||
object:
|
||||
</P>
|
||||
<PRE><SPAN style="FONT-SIZE: 10pt">regex_iterator<</SPAN><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">BidirectionalIterator</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt">, </SPAN><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">charT</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt">, traits, Allocator> <BR> </SPAN><SPAN class=grame><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">i(</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt">first, last, e, flags)</SPAN>, </PRE>
|
||||
<P>and uses
|
||||
<SPAN class="spelle">
|
||||
<I>i</I></SPAN>
|
||||
to enumerate through all of the matches <I>m</I> of type
|
||||
<SPAN class="spelle">
|
||||
<SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">match_results</SPAN>
|
||||
</SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'"><<SPAN class="spelle">BidirectionalIterator</SPAN>> </SPAN>that
|
||||
occur within the sequence [first, last).
|
||||
</P>
|
||||
<P>If no such matches are found
|
||||
<SPAN class="grame">and </SPAN></P>
|
||||
<PRE><SPAN class=grame></SPAN><SPAN class=grame><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">!</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(flags & <SPAN class=spelle>format_no_copy</SPAN>)</SPAN> </PRE>
|
||||
<P>then calls
|
||||
</P>
|
||||
<PRE><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">std::copy</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(first, last, out)</SPAN>. </PRE>
|
||||
<P>Otherwise, for each match found,
|
||||
<SPAN class="grame">if </SPAN></P>
|
||||
<PRE><SPAN class=grame></SPAN><SPAN class=grame><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">!</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(flags & <SPAN class=spelle>format_no_copy</SPAN>)</SPAN> </PRE>
|
||||
<P>calls
|
||||
</P>
|
||||
<PRE><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">std::copy</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(<SPAN class=spelle>m.prefix</SPAN>().first, <SPAN class=spelle>m.prefix</SPAN>().last, out)</SPAN>, </PRE>
|
||||
<P>and then calls
|
||||
</P>
|
||||
<PRE><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">m.format</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(out, <SPAN class=spelle>fmt</SPAN>, flags)</SPAN>. </PRE>
|
||||
<P>Finally
|
||||
<SPAN class="grame">if </SPAN></P>
|
||||
<PRE><SPAN class=grame></SPAN><SPAN class=grame><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">!</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(flags & <SPAN class=spelle>format_no_copy</SPAN>)</SPAN> </PRE>
|
||||
<P>calls
|
||||
</P>
|
||||
<PRE><SPAN class=spelle><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">std::copy</SPAN></SPAN><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">(<SPAN class=spelle>last_m.suffix</SPAN>().first, <SPAN class=spelle>last_m,suffix</SPAN>().last, out) </SPAN></PRE>
|
||||
<P><SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'"></SPAN>where
|
||||
<SPAN class="spelle">
|
||||
<SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">last_m</SPAN>
|
||||
</SPAN>
|
||||
is a copy of the last match found.
|
||||
</P>
|
||||
<P>If
|
||||
<SPAN style="FONT-SIZE: 10pt; FONT-FAMILY: 'Courier New'">flags &
|
||||
<SPAN class="spelle">format_first_only</SPAN></SPAN>
|
||||
is non-zero then only the first match found is replaced.</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<P><B> Returns:</B> <CODE>out</CODE>.
|
||||
</P>
|
||||
<PRE><A name=f2></A>template <class traits, class charT>
|
||||
basic_string<charT> regex_replace(const basic_string<charT>& s,
|
||||
const <A href="basic_regex.html">basic_regex</A><charT, traits>& e,
|
||||
const basic_string<charT>& fmt,
|
||||
<A href="match_flag_type.html">match_flag_type flags = match_default</A>);</PRE>
|
||||
<P><B> Effects:</B> Constructs an object <CODE>basic_string<charT> result</CODE>,
|
||||
calls <CODE>regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt,
|
||||
flags)</CODE>, and then returns <CODE>result</CODE>.
|
||||
<H3><A name="examples"></A>Examples</H3>
|
||||
<P>The following <A href="../example/snippets/regex_replace_example.cpp">example</A>
|
||||
takes C/C++ source code as input, and outputs syntax highlighted HTML code.</P>
|
||||
<P></P>
|
||||
<PRE><FONT color=#008080>#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
#include <boost/regex.hpp>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
</FONT>
|
||||
<FONT color=#000080><I>// purpose:
|
||||
// takes the contents of a file and transform to
|
||||
// syntax highlighted code in html format
|
||||
</I></FONT>
|
||||
boost::regex e1, e2;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* expression_text;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* format_string;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* pre_expression;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* pre_format;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* header_text;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* footer_text;
|
||||
|
||||
<B>void</B> load_file(std::string& s, std::istream& is)
|
||||
{
|
||||
s.erase();
|
||||
s.reserve(is.rdbuf()->in_avail());
|
||||
<B>char</B> c;
|
||||
<B>while</B>(is.get(c))
|
||||
{
|
||||
<B>if</B>(s.capacity() == s.size())
|
||||
s.reserve(s.capacity() * <FONT color=#000080>3</FONT>);
|
||||
s.append(<FONT color=#000080>1</FONT>, c);
|
||||
}
|
||||
}
|
||||
|
||||
<B>int</B> main(<B>int</B> argc, <B>const</B> <B>char</B>** argv)
|
||||
{
|
||||
try{
|
||||
e1.assign(expression_text);
|
||||
e2.assign(pre_expression);
|
||||
<B>for</B>(<B>int</B> i = <FONT color=#000080>1</FONT>; i < argc; ++i)
|
||||
{
|
||||
std::cout << <FONT color=#0000ff>"Processing file "</FONT> << argv[i] << std::endl;
|
||||
std::ifstream fs(argv[i]);
|
||||
std::string in;
|
||||
load_file(in, fs);
|
||||
std::string out_name(std::string(argv[i]) + std::string(<FONT color=#0000ff>".htm"</FONT>));
|
||||
std::ofstream os(out_name.c_str());
|
||||
os << header_text;
|
||||
<FONT color=#000080><I>// strip '<' and '>' first by outputting to a
|
||||
</I></FONT> <FONT color=#000080><I>// temporary string stream
|
||||
</I></FONT> std::ostringstream t(std::ios::out | std::ios::binary);
|
||||
std::ostream_iterator<<B>char</B>, <B>char</B>> oi(t);
|
||||
boost::regex_replace(oi, in.begin(), in.end(),
|
||||
e2, pre_format, boost::match_default | boost::format_all);
|
||||
<FONT color=#000080><I>// then output to final output stream
|
||||
</I></FONT> <FONT color=#000080><I>// adding syntax highlighting:
|
||||
</I></FONT> std::string s(t.str());
|
||||
std::ostream_iterator<<B>char</B>, <B>char</B>> out(os);
|
||||
boost::regex_replace(out, s.begin(), s.end(),
|
||||
e1, format_string, boost::match_default | boost::format_all);
|
||||
os << footer_text;
|
||||
}
|
||||
}
|
||||
<STRONG>catch</STRONG>(...)
|
||||
{ <STRONG>return</STRONG> -1; }
|
||||
<B>return</B> <FONT color=#000080>0</FONT>;
|
||||
}
|
||||
|
||||
<B>extern</B> <B>const</B> <B>char</B>* pre_expression = <FONT color=#0000ff>"(<)|(>)|(&)|\\r"</FONT>;
|
||||
<B>extern</B> <B>const</B> <B>char</B>* pre_format = <FONT color=#0000ff>"(?1<)(?2>)(?3&amp;)"</FONT>;
|
||||
|
||||
|
||||
<B>const</B> <B>char</B>* expression_text = <FONT color=#000080><I>// preprocessor directives: index 1
|
||||
</I></FONT> <FONT color=#0000ff>"(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
|
||||
</FONT> <FONT color=#000080><I>// comment: index 2
|
||||
</I></FONT> <FONT color=#0000ff>"(//[^\\n]*|/\\*.*?\\*/)|"
|
||||
</FONT> <FONT color=#000080><I>// literals: index 3
|
||||
</I></FONT> <FONT color=#0000ff>"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
|
||||
</FONT> <FONT color=#000080><I>// string literals: index 4
|
||||
</I></FONT> <FONT color=#0000ff>"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
|
||||
</FONT> <FONT color=#000080><I>// keywords: index 5
|
||||
</I></FONT> <FONT color=#0000ff>"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
|
||||
</FONT> <FONT color=#0000ff>"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
|
||||
</FONT> <FONT color=#0000ff>"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
|
||||
</FONT> <FONT color=#0000ff>"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
|
||||
</FONT> <FONT color=#0000ff>"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
|
||||
</FONT> <FONT color=#0000ff>"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
|
||||
</FONT> <FONT color=#0000ff>"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
|
||||
</FONT> <FONT color=#0000ff>"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
|
||||
</FONT> <FONT color=#0000ff>"|using|virtual|void|volatile|wchar_t|while)\\>"
|
||||
</FONT> ;
|
||||
|
||||
<B>const</B> <B>char</B>* format_string = <FONT color=#0000ff>"(?1<font color=\"#008040\">$&</font>)"
|
||||
</FONT> <FONT color=#0000ff>"(?2<I><font color=\"#000080\">$&</font></I>)"
|
||||
</FONT> <FONT color=#0000ff>"(?3<font color=\"#0000A0\">$&</font>)"
|
||||
</FONT> <FONT color=#0000ff>"(?4<font color=\"#0000FF\">$&</font>)"
|
||||
</FONT> <FONT color=#0000ff>"(?5<B>$&</B>)"</FONT>;
|
||||
|
||||
<B>const</B> <B>char</B>* header_text = <FONT color=#0000ff>"<HTML>\n<HEAD>\n"
|
||||
</FONT> <FONT color=#0000ff>"<TITLE>Auto-generated html formated source</TITLE>\n"
|
||||
</FONT> <FONT color=#0000ff>"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
|
||||
</FONT> <FONT color=#0000ff>"</HEAD>\n"
|
||||
</FONT> <FONT color=#0000ff>"<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
|
||||
</FONT> <FONT color=#0000ff>"<P> </P>\n<PRE>"</FONT>;
|
||||
|
||||
<B>const</B> <B>char</B>* footer_text = <FONT color=#0000ff>"</PRE>\n</BODY>\n\n"</FONT>;
|
||||
</PRE>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,315 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_search</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Algorithm regex_search</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
|
||||
Examples</A></dt></dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<PRE>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>> </PRE>
|
||||
<P></P>
|
||||
<P>The algorithm regex_search will search a range denoted by a pair of
|
||||
bidirectional-iterators for a given regular expression. The algorithm uses
|
||||
various heuristics to reduce the search time by only checking for a match if a
|
||||
match could conceivably start at that position. The algorithm is defined as
|
||||
follows:
|
||||
<PRE>template <class BidirectionalIterator,
|
||||
class Allocator, class charT, class traits>
|
||||
bool <A href="#f1">regex_search</A>(BidirectionalIterator first, BidirectionalIterator last,
|
||||
<a href="match_results.html">match_results</a><BidirectionalIterator, Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
|
||||
template <class ST, class SA,
|
||||
class Allocator, class charT, class traits>
|
||||
bool <A href="#f2">regex_search</A>(const basic_string<charT, ST, SA>& s,
|
||||
<a href="match_results.html">match_results</a><
|
||||
typename basic_string<charT, ST,SA>::const_iterator,
|
||||
Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
|
||||
template<class charT, class Allocator, class traits>
|
||||
bool <A href="#f3">regex_search</A>(const charT* str,
|
||||
<a href="match_results.html">match_results</a><const charT*, Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
|
||||
template <class BidirectionalIterator, class charT, class traits>
|
||||
bool <A href="#f4">regex_search</A>(BidirectionalIterator first, BidirectionalIterator last,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
|
||||
template <class charT, class traits>
|
||||
bool <A href="#f5">regex_search</A>(const charT* str,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
|
||||
template<class ST, class SA, class charT, class traits>
|
||||
bool <A href="#f6">regex_search</A>(const basic_string<charT, ST, SA>& s,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<PRE><A name=f1></A>template <class BidirectionalIterator, class Allocator, class charT, class traits>
|
||||
bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
|
||||
<a href="match_results.html">match_results</a><BidirectionalIterator, Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Requires:</B> Type BidirectionalIterator meets the requirements of a
|
||||
Bidirectional Iterator (24.1.4).</P>
|
||||
<P><B> Effects: </B>Determines whether there is some sub-sequence within
|
||||
[first,last) that matches the regular expression <I>e</I>, parameter <I>flags</I>
|
||||
is used to control how the expression is matched against the character
|
||||
sequence. Returns true if such a sequence exists, false otherwise.</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<P><B> Postconditions: </B>If the function returns false, then the effect on
|
||||
parameter <I>m</I> is undefined, otherwise the effects on parameter <I>m</I> are
|
||||
given in the table:</P>
|
||||
<DIV align="center">
|
||||
<CENTER>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><B> Element</B></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><B> Value</B>
|
||||
</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.size()</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>e.mark_count()</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.empty()</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>false</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>first</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().last</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].first</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.prefix().first != m.prefix().second</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].second</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().last</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>last</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m.suffix().first != m.suffix().second</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>The start of the sequence of characters that matched the regular expression</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].second</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>The end of the sequence of characters that matched the regular expression</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[0].matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P><CODE> true</CODE> if a full match was found, and <CODE>false</CODE> if it was
|
||||
a partial match (found as a result of the <CODE>match_partial</CODE> flag being
|
||||
set).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].first</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), the start of the sequence that matched
|
||||
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <I>last</I>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].second</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), the end of the sequence that matched
|
||||
sub-expression <I>n</I>. Alternatively, if sub-expression n did not participate
|
||||
in the match, then <I>last</I>.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>m[n].matched</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="50%">
|
||||
<P>For all integers n < m.size(), true if sub-expression <I>n</I> participated
|
||||
in the match, false otherwise.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TD></TR></TABLE>
|
||||
</CENTER>
|
||||
</DIV>
|
||||
<PRE><A name=f2></A>template <class charT, class Allocator, class traits>
|
||||
bool regex_search(const charT* str, <a href="match_results.html">match_results</a><const charT*, Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_search(str, str +
|
||||
char_traits<charT>::length(str), m, e, flags)</CODE>.</P>
|
||||
<PRE><A name=f3></A>template <class ST, class SA, class Allocator, class charT,
|
||||
class traits>
|
||||
bool regex_search(const basic_string<charT, ST, SA>& s,
|
||||
<a href="match_results.html">match_results</a><typename basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_search(s.begin(), s.end(), m,
|
||||
e, flags)</CODE>.</P>
|
||||
<PRE><A name=f4></A>template <class iterator, class charT, class traits>
|
||||
bool regex_search(iterator first, iterator last,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Behaves "as if" by constructing an instance of <CODE><a href="match_results.html">
|
||||
match_results</a><</CODE>BidirectionalIterator<CODE>> what</CODE>,
|
||||
and then returning the result of <CODE>regex_search(first, last, what, e, flags)</CODE>.</P>
|
||||
<PRE><A name=f5></A>template <class charT, class traits>
|
||||
bool regex_search(const charT* str
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_search(str, str +
|
||||
char_traits<charT>::length(str), e, flags)</CODE>.</P>
|
||||
<PRE><A name=f6></A>template <class ST, class SA, class charT, class traits>
|
||||
bool regex_search(const basic_string<charT, ST, SA>& s,
|
||||
const <a href="basic_regex.html">basic_regex</a><charT, traits>& e,
|
||||
<a href="match_flag_type.html">match_flag_type</a> flags = match_default);</PRE>
|
||||
<P><B> Effects:</B> Returns the result of <CODE>regex_search(s.begin(), s.end(), e,
|
||||
flags)</CODE>.
|
||||
<H3><A name="examples"></A>Examples</H3>
|
||||
<P>The following <A href="../example/snippets/regex_search_example.cpp">example</A>,
|
||||
takes the contents of a file in the form of a string, and searches for all the
|
||||
C++ class declarations in the file. The code will work regardless of the way
|
||||
that std::string is implemented, for example it could easily be modified to
|
||||
work with the SGI rope class, which uses a non-contiguous storage strategy.</P>
|
||||
<P></P>
|
||||
<PRE><FONT color=#008000>#include <string>
|
||||
#include <map>
|
||||
#include <boost/regex.hpp>
|
||||
</FONT><FONT color=#000080><I>
|
||||
// purpose:
|
||||
// takes the contents of a file in the form of a string
|
||||
// and searches for all the C++ class definitions, storing
|
||||
// their locations in a map of strings/int's
|
||||
</I></FONT><B>typedef</B> std::map<std::string, <B>int</B>, std::less<std::string> > map_type;
|
||||
|
||||
boost::regex expression("^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\\<\\w+\\>([[:blank:]]*\\([^)]*\\))?[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\\{|:[^;\\{()]*\\{)");
|
||||
<B>
|
||||
void</B> IndexClasses(map_type& m, <B>const</B> std::string& file)
|
||||
{
|
||||
std::string::const_iterator start, end;
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
boost::<a href="match_results.html">match_results</a><std::string::const_iterator> what;
|
||||
boost::match_flag_type flags = boost::match_default;
|
||||
<B>while</B>(regex_search(start, end, what, expression, flags))
|
||||
{
|
||||
<FONT color=#000080> <I>// what[0] contains the whole string
|
||||
</I> <I>// what[5] contains the class name.
|
||||
</I> <I>// what[6] contains the template specialisation if any.
|
||||
</I> <I>// add class name and position to map:
|
||||
</I></FONT> m[std::string(what[5].first, what[5].second) + std::string(what[6].first, what[6].second)] =
|
||||
what[5].first - file.begin();
|
||||
<FONT color=#000080><I>// update search position:
|
||||
</I></FONT> start = what[0].second;
|
||||
<FONT color=#000080><I>// update flags:
|
||||
</I></FONT> flags |= boost::match_prev_avail;
|
||||
flags |= boost::match_not_bob;
|
||||
}
|
||||
}
|
||||
</PRE>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
23 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,145 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Algorithm regex_split (deprecated)</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Algorithm regex_split (deprecated)</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>The algorithm regex_split has been deprecated in favor of the iterator <A href="regex_token_iterator.html">
|
||||
regex_token_iterator</A> which has a more flexible and powerful interface,
|
||||
as well as following the more usual standard library "pull" rather than "push"
|
||||
semantics.</P>
|
||||
<P>Code which uses regex_split will continue to compile, the following
|
||||
documentation is taken from the previous boost.regex version:</P>
|
||||
<H3><A name="regex_split"></A>Algorithm regex_split</H3>
|
||||
<PRE>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>> </PRE>
|
||||
<P>Algorithm regex_split performs a similar operation to the perl split operation,
|
||||
and comes in three overloaded forms:
|
||||
</P>
|
||||
<PRE><B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2>& e,
|
||||
<STRONG> </STRONG>boost::match_flag_type flags,
|
||||
std::size_t max_split);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s);</PRE>
|
||||
<P><STRONG>Effects: </STRONG>Each version of the algorithm takes an
|
||||
output-iterator for output, and a string for input. If the expression contains
|
||||
no marked sub-expressions, then the algorithm writes one string onto the
|
||||
output-iterator for each section of input that does not match the expression.
|
||||
If the expression does contain marked sub-expressions, then each time a match
|
||||
is found, one string for each marked sub-expression will be written to the
|
||||
output-iterator. No more than <I>max_split </I>strings will be written to the
|
||||
output-iterator. Before returning, all the input processed will be deleted from
|
||||
the string <I>s</I> (if <I>max_split </I>is not reached then all of <I>s</I> will
|
||||
be deleted). Returns the number of strings written to the output-iterator. If
|
||||
the parameter <I>max_split</I> is not specified then it defaults to UINT_MAX.
|
||||
If no expression is specified, then it defaults to "\s+", and splitting occurs
|
||||
on whitespace.
|
||||
</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<P><A href="../example/snippets/regex_split_example_1.cpp">Example</A>: the
|
||||
following function will split the input string into a series of tokens, and
|
||||
remove each token from the string <I>s</I>:
|
||||
</P>
|
||||
<PRE><B>unsigned</B> tokenise(std::list<std::string>& l, std::string& s)
|
||||
{
|
||||
<B> return</B> boost::regex_split(std::back_inserter(l), s);
|
||||
}</PRE>
|
||||
<P><A href="../example/snippets/regex_split_example_2.cpp">Example</A>: the
|
||||
following short program will extract all of the URL's from a html file, and
|
||||
print them out to <I>cout</I>:
|
||||
</P>
|
||||
<PRE><FONT color=#008000>#include <list>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <boost/regex.hpp>
|
||||
</FONT>
|
||||
boost::regex e(<FONT color=#000080>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
|
||||
boost::regbase::normal | boost::regbase::icase);
|
||||
|
||||
<B>void</B> load_file(std::string& s, std::istream& is)
|
||||
{
|
||||
s.erase();
|
||||
<FONT color=#000080>//
|
||||
// attempt to grow string buffer to match file size,
|
||||
// this doesn't always work...
|
||||
</FONT> s.reserve(is.rdbuf()-&gtin_avail());
|
||||
<B>char</B> c;
|
||||
<B>while</B>(is.get(c))
|
||||
{
|
||||
<FONT color=#000080>// use logarithmic growth stategy, in case
|
||||
// in_avail (above) returned zero:
|
||||
</FONT> <B>if</B>(s.capacity() == s.size())
|
||||
s.reserve(s.capacity() * 3);
|
||||
s.append(1, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
|
||||
{
|
||||
std::string s;
|
||||
std::list<std::string> l;
|
||||
|
||||
<B>for</B>(<B>int</B> i = 1; i < argc; ++i)
|
||||
{
|
||||
std::cout << <FONT color=#000080>"Findings URL's in "</FONT> << argv[i] << <FONT color=#000080>":"</FONT> << std::endl;
|
||||
s.erase();
|
||||
std::ifstream is(argv[i]);
|
||||
load_file(s, is);
|
||||
boost::regex_split(std::back_inserter(l), s, e);
|
||||
<B>while</B>(l.size())
|
||||
{
|
||||
s = *(l.begin());
|
||||
l.pop_front();
|
||||
std::cout << s << std::endl;
|
||||
}
|
||||
}
|
||||
<B>return</B> 0;
|
||||
}</PRE>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,381 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: regex_token_iterator</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">regex_token_iterator</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
|
||||
Examples</A></dt></dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
||||
that is to say it represents a new view of an existing iterator sequence, by
|
||||
enumerating all the occurrences of a regular expression within that sequence,
|
||||
and presenting one or more character sequence for each match found. Each
|
||||
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
|
||||
object that represents what matched a particular sub-expression within the
|
||||
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
|
||||
enumerate a single sub-expression with index -1, then the iterator performs
|
||||
field splitting: that is to say it enumerates one character sequence for each
|
||||
section of the character container sequence that does not match the regular
|
||||
expression specified.</P>
|
||||
<PRE>
|
||||
template <class BidirectionalIterator,
|
||||
class charT = iterator_traits<BidirectionalIterator>::value_type,
|
||||
class traits = regex_traits<charT> >
|
||||
class regex_token_iterator
|
||||
{
|
||||
public:
|
||||
typedef <A href="basic_regex.html">basic_regex</A><charT, traits> regex_type;
|
||||
typedef <A href="sub_match.html">sub_match</A><BidirectionalIterator> value_type;
|
||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||
typedef const value_type* pointer;
|
||||
typedef const value_type& reference;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
|
||||
<A href="#c1">regex_token_iterator</A>();
|
||||
<A href="#c2">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
int submatch = 0, <A href="match_flag_type.html">match_flag_type</A> m = match_default);
|
||||
<A href="#c3">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const std::vector<int>& submatches, match_flag_type m = match_default);
|
||||
template <std::size_t N>
|
||||
<A href="#c4">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const int (&submatches)[N], match_flag_type m = match_default);
|
||||
<A href="#c5">regex_token_iterator</A>(const regex_token_iterator&);
|
||||
regex_token_iterator& <A href="#o1">operator</A>=(const regex_token_iterator&);
|
||||
bool <A href="#o2">operator</A>==(const regex_token_iterator&)const;
|
||||
bool <A href="#o3">operator</A>!=(const regex_token_iterator&)const;
|
||||
const value_type& <A href="#o4">operator</A>*()const;
|
||||
const value_type* <A href="#o5">operator</A>->()const;
|
||||
regex_token_iterator& <A href="#o6">operator</A>++();
|
||||
regex_token_iterator <A href="#o7">operator</A>++(int);
|
||||
};
|
||||
|
||||
typedef regex_token_iterator<const char*> cregex_token_iterator;
|
||||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||||
#ifndef BOOST_NO_WREGEX
|
||||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||
#endif
|
||||
|
||||
template <class charT, class traits>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
int submatch = 0,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
int submatch = 0,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, std::size_t N>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA, std::size_t N>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
<A href="#make_regex_token_iterator">make_regex_token_iterator</A>(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<PRE><A name=c1></A>regex_token_iterator();</PRE>
|
||||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||||
string for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||||
string enumerated is the sub-expression <EM>submatch </EM>for each match
|
||||
found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that
|
||||
did not match the expression <EM>re </EM>(that is to performs field splitting).</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||||
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||||
strings for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||||
each match found one string will be enumerated for each sub-expression
|
||||
index contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
|
||||
is -1, then the first string enumerated for each match will be all of the text
|
||||
from end of the last match to the start of the current match, in addition there
|
||||
will be one extra string enumerated when no more matches can be found: from the
|
||||
end of the last match found, to the end of the underlying sequence.</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<PRE><A name=c4></A>template <std::size_t N>
|
||||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||||
enumerate <EM>R</EM> strings for each regular expression match of the
|
||||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||||
flags <EM>m</EM>. For each match found one string will be
|
||||
enumerated for each sub-expression index contained within the <EM>submatches
|
||||
</EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated
|
||||
for each match will be all of the text from end of the last match to the start
|
||||
of the current match, in addition there will be one extra string enumerated
|
||||
when no more matches can be found: from the end of the last match found, to the
|
||||
end of the underlying sequence.</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<PRE><A name=c5></A>regex_token_iterator(const regex_token_iterator& that);</PRE>
|
||||
<P><B> Effects: </B>constructs a copy of <CODE>that</CODE>.</P>
|
||||
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
||||
<PRE><A name=o1></A>regex_token_iterator& operator=(const regex_token_iterator& that);</PRE>
|
||||
<P><B> Effects: </B>sets <CODE>*this</CODE> to be equal to <CODE>that</CODE>.</P>
|
||||
<P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
|
||||
<PRE><A name=o2></A>bool operator==(const regex_token_iterator&)const;</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>returns true if *this is the same position as that.</P>
|
||||
<PRE><A name=o3></A>bool operator!=(const regex_token_iterator&)const;</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
||||
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>returns the current character sequence being enumerated.</P>
|
||||
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
||||
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
|
||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||
or if the program runs out of stack space while matching the expression (if
|
||||
Boost.regex is <A href="configuration.html">configured</A> in recursive mode),
|
||||
or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<B>
|
||||
<P>
|
||||
Returns:</B><CODE> *this</CODE>.</P><PRE><A name=o7></A>regex_token_iterator& operator++(int);</PRE>
|
||||
<P><B> Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>,
|
||||
then calls <CODE>++(*this)</CODE>.</P>
|
||||
<P><B> Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
|
||||
<PRE><A name=make_regex_token_iterator></A>template <class charT, class traits>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
make_regex_token_iterator(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
int submatch = 0,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
int submatch = 0,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, std::size_t N>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
make_regex_token_iterator(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA, std::size_t N>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const int (&submatch)[N],
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits>
|
||||
regex_token_iterator<const charT*, charT, traits>
|
||||
make_regex_token_iterator(const charT* p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
|
||||
template <class charT, class traits, class ST, class SA>
|
||||
regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
|
||||
make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p,
|
||||
const basic_regex<charT, traits>& e,
|
||||
const std::vector<int>& submatch,
|
||||
regex_constants::match_flag_type m = regex_constants::match_default);
|
||||
</PRE>
|
||||
<P>Effects: returns a <A href="#synopsis">regex_token_iterator</A> that enumerates
|
||||
one <A href="sub_match.html">sub_match</A> for each value in <EM>submatch</EM> for
|
||||
each occurrence of regular expression <EM>e</EM> in string <EM>p</EM>, matched
|
||||
using <A href="match_flag_type.html">match_flags</A> <EM>m</EM>.</P>
|
||||
<P></P>
|
||||
<H3>Examples</H3>
|
||||
<P>The following <A href="../example/snippets/regex_token_iterator_eg_1.cpp">example</A>
|
||||
takes a string and splits it into a series of tokens:</P>
|
||||
<pre>
|
||||
<FONT color=#008040>#include <iostream></FONT>
|
||||
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
||||
|
||||
<B>using</B> <B>namespace</B> std;
|
||||
|
||||
<B>int</B> main(<B>int</B> argc)
|
||||
{
|
||||
string s;
|
||||
<B>do</B>{
|
||||
<B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
|
||||
{
|
||||
cout << <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
|
||||
getline(cin, s);
|
||||
<B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
|
||||
}
|
||||
<B>else</B>
|
||||
s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;
|
||||
|
||||
boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
|
||||
boost::sregex_token_iterator i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
|
||||
boost::sregex_token_iterator j;
|
||||
|
||||
<B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
|
||||
<B>while</B>(i != j)
|
||||
{
|
||||
cout << *i++ << endl;
|
||||
count++;
|
||||
}
|
||||
cout << <FONT color=#0000ff>"There were "</FONT> << count << <FONT color=#0000ff>" tokens found."</FONT> << endl;
|
||||
|
||||
}<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
|
||||
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
||||
}
|
||||
|
||||
</pre>
|
||||
<P>The following <A href="../example/snippets/regex_token_iterator_eg_2.cpp">example</A>
|
||||
takes a html file and outputs a list of all the linked files:</P>
|
||||
<pre>
|
||||
<FONT color=#008040>#include <fstream></FONT>
|
||||
<FONT color=#008040>#include <iostream></FONT>
|
||||
<FONT color=#008040>#include <iterator></FONT>
|
||||
<FONT color=#008040>#include <boost/regex.hpp></FONT>
|
||||
|
||||
boost::regex e(<FONT color=#0000ff>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
|
||||
boost::regex::normal | boost::regbase::icase);
|
||||
|
||||
<B>void</B> load_file(std::string& s, std::istream& is)
|
||||
{
|
||||
s.erase();
|
||||
<I><FONT color=#000080>//</FONT></I>
|
||||
<I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
|
||||
<I><FONT color=#000080>// this doesn't always work...</FONT></I>
|
||||
s.reserve(is.rdbuf()->in_avail());
|
||||
<B>char</B> c;
|
||||
<B>while</B>(is.get(c))
|
||||
{
|
||||
<I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
|
||||
<I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
|
||||
<B>if</B>(s.capacity() == s.size())
|
||||
s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
|
||||
s.append(<FONT color=#0000a0>1</FONT>, c);
|
||||
}
|
||||
}
|
||||
|
||||
<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
|
||||
{
|
||||
std::string s;
|
||||
<B>int</B> i;
|
||||
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
||||
{
|
||||
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
||||
s.erase();
|
||||
std::ifstream is(argv[i]);
|
||||
load_file(s, is);
|
||||
boost::sregex_token_iterator i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
|
||||
boost::sregex_token_iterator j;
|
||||
<B>while</B>(i != j)
|
||||
{
|
||||
std::cout << *i++ << std::endl;
|
||||
}
|
||||
}
|
||||
<I><FONT color=#000080>//</FONT></I>
|
||||
<I><FONT color=#000080>// alternative method:</FONT></I>
|
||||
<I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
|
||||
<I><FONT color=#000080>// match as well as $1....</FONT></I>
|
||||
<I><FONT color=#000080>//</FONT></I>
|
||||
<B>for</B>(i = <FONT color=#0000a0>1</FONT>; i < argc; ++i)
|
||||
{
|
||||
std::cout << <FONT color=#0000ff>"Findings URL's in "</FONT> << argv[i] << <FONT color=#0000ff>":"</FONT> << std::endl;
|
||||
s.erase();
|
||||
std::ifstream is(argv[i]);
|
||||
load_file(s, is);
|
||||
<B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
|
||||
boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
|
||||
boost::sregex_token_iterator j;
|
||||
<B>while</B>(i != j)
|
||||
{
|
||||
std::cout << *i++ << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
<B>return</B> <FONT color=#0000a0>0</FONT>;
|
||||
}
|
||||
</pre>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
26 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,87 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: class regex_traits</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">class regex_traits</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<pre>
|
||||
namespace boost{
|
||||
|
||||
template <class charT, class implementationT = sensible_default_choice>
|
||||
struct regex_traits : public implementationT
|
||||
{
|
||||
regex_traits() : implementationT() {}
|
||||
};
|
||||
|
||||
template <class charT>
|
||||
struct c_regex_traits;
|
||||
|
||||
template <class charT>
|
||||
struct cpp_regex_traits;
|
||||
|
||||
template <class charT>
|
||||
struct w32_regex_traits;
|
||||
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<P>The class regex_traits is just a thin wrapper around an actual implemention
|
||||
class, which may be one of:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
c_regex_traits: this class is deprecated, it wraps the C locale, and is used as
|
||||
the default implementation when the platform is not Win32, and the C++ locale
|
||||
is not available.</LI>
|
||||
<LI>
|
||||
cpp_regex_traits: the default traits class for non-Win32 platforms, allows the
|
||||
regex class to be imbued with a std::locale instance.</LI>
|
||||
<LI>
|
||||
w32_regex_traits: the default traits class implementation on Win32 platforms,
|
||||
allows the regex class to be imbued with an LCID.</LI></UL>
|
||||
<P>The default behavior can be altered by defining one of the following
|
||||
configuration macros in <A href="../../../boost/regex/user.hpp">boost/regex/user.hpp</A>:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
BOOST_REGEX_USE_C_LOCALE: makes c_regex_traits the default.</LI>
|
||||
<LI>
|
||||
BOOST_REGEX_USE_CPP_LOCALE: makes cpp_regex_traits the default.</LI></UL>
|
||||
<P>All these traits classes fulfil the <A href="concepts.html#traits">traits class
|
||||
requirements</A>.</P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,237 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Standards Conformance</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Standards Conformance</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>C++</H3>
|
||||
<P>Boost.regex is intended to conform to the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">
|
||||
regular expression standardization proposal</A>, which will appear in a
|
||||
future C++ standard technical report (and hopefully in a future version of the
|
||||
standard). </P>
|
||||
<H3>ECMAScript / JavaScript</H3>
|
||||
<P>All of the ECMAScript regular expression syntax features are supported, except
|
||||
that:</P>
|
||||
<P>Negated class escapes (\S, \D and \W) are not permitted inside character class
|
||||
definitions ( [...] ).</P>
|
||||
<P>The escape sequence \u matches any upper case character (the same as
|
||||
[[:upper:]]) rather than a Unicode escape sequence; use \x{DDDD} for
|
||||
Unicode escape sequences.</P>
|
||||
<H3>Perl</H3>
|
||||
<P>Almost all Perl features are supported, except for:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD>(?{code})</TD>
|
||||
<TD>Not implementable in a compiled strongly typed language.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>(??{code})</TD>
|
||||
<TD>Not implementable in a compiled strongly typed language.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3>POSIX</H3>
|
||||
<P>All the POSIX basic and extended regular expression features are supported,
|
||||
except that:</P>
|
||||
<P>No character collating names are recognized except those specified in the POSIX
|
||||
standard for the C locale, unless they are explicitly registered with the
|
||||
traits class.</P>
|
||||
<P>Character equivalence classes ( [[=a=]] etc) are probably buggy except on
|
||||
Win32. Implementing this feature requires knowledge of the format of the
|
||||
string sort keys produced by the system; if you need this, and the default
|
||||
implementation doesn't work on your platform, then you will need to supply a
|
||||
custom traits class.</P>
|
||||
<H3>Unicode</H3>
|
||||
<P>The following comments refer to <A href="http://www.unicode.org/reports/tr18/">Unicode
|
||||
Technical
|
||||
<SPAN>Standard
|
||||
</SPAN>#18: Unicode Regular Expressions</A> version 9.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD>#</TD>
|
||||
<TD>Feature</TD>
|
||||
<TD>Support</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.1</TD>
|
||||
<TD>Hex Notation</TD>
|
||||
<TD>Yes: use \x{DDDD} to refer to code point UDDDD.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.2</TD>
|
||||
<TD>Character Properties</TD>
|
||||
<TD>All the names listed under the <A href="http://www.unicode.org/reports/tr18/#Categories">General
|
||||
Category Property</A> are supported. Script names and Other Names are
|
||||
not currently supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.3</TD>
|
||||
<TD><A name="Subtraction_and_Intersection">Subtraction</A> and Intersection</TD>
|
||||
<TD>
|
||||
<P>Indirectly support by forward-lookahead:
|
||||
</P>
|
||||
<P>(?=[[:X:]])[[:Y:]]</P>
|
||||
<P>Gives the intersection of character properties X and Y.</P>
|
||||
<P>(?![[:X:]])[[:Y:]]</P>
|
||||
<P>Gives everything in Y that is not in X (subtraction).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.4</TD>
|
||||
<TD><A name="Simple_Word_Boundaries">Simple Word Boundaries</A></TD>
|
||||
<TD>Conforming: non-spacing marks are included in the set of word characters.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.5</TD>
|
||||
<TD>Caseless Matching</TD>
|
||||
<TD>Supported, note that at this level, case transformations are 1:1, many to many
|
||||
case folding operations are not supported (for example "<22>" to "SS").</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.6</TD>
|
||||
<TD>Line Boundaries</TD>
|
||||
<TD>Supported, except that "." matches only one character of "\r\n". Other than
|
||||
that word boundaries match correctly; including not matching in the middle of a
|
||||
"\r\n" sequence.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.7</TD>
|
||||
<TD>Code Points</TD>
|
||||
<TD>Supported: provided you use the <A href="icu_strings.html">u32* algorithms</A>,
|
||||
then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code
|
||||
points.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.1</TD>
|
||||
<TD>Canonical Equivalence</TD>
|
||||
<TD>Not supported: it is up to the user of the library to convert all text into
|
||||
the same canonical form as the regular expression.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.2</TD>
|
||||
<TD>Default Grapheme Clusters</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.3</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Word_Boundaries">Default Word Boundaries</A></P>
|
||||
</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.4</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Loose_Matches">Default Loose Matches</A></P>
|
||||
</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.5</TD>
|
||||
<TD>Name Properties</TD>
|
||||
<TD>Supported: the expression "[[:name:]]" or \N{name} matches the named character
|
||||
"name".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.6</TD>
|
||||
<TD>Wildcard properties</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.1</TD>
|
||||
<TD>Tailored Punctuation.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.2</TD>
|
||||
<TD>Tailored Grapheme Clusters</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.3</TD>
|
||||
<TD>Tailored Word Boundaries.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.4</TD>
|
||||
<TD>Tailored Loose Matches</TD>
|
||||
<TD>Partial support: [[=c=]] matches characters with the same primary equivalence
|
||||
class as "c".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.5</TD>
|
||||
<TD>Tailored Ranges</TD>
|
||||
<TD>Supported: [a-b] matches any character that collates in the range a to b, when
|
||||
the expression is constructed with the <A href="syntax_option_type.html">collate</A>
|
||||
flag set.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.6</TD>
|
||||
<TD>Context Matches</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.7</TD>
|
||||
<TD>Incremental Matches</TD>
|
||||
<TD>Supported: pass the flag <A href="match_flag_type.html">match_partial</A> to
|
||||
the regex algorithms.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.8</TD>
|
||||
<TD>Unicode Set Sharing</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.9</TD>
|
||||
<TD>Possible Match Sets</TD>
|
||||
<TD>Not supported, however this information is used internally to optimise the
|
||||
matching of regular expressions, and return quickly if no match is possible.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.10</TD>
|
||||
<TD>Folded Matching</TD>
|
||||
<TD>Partial Support: It is possible to achieve a similar effect by using a
|
||||
custom regular expression traits class.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.11</TD>
|
||||
<TD>Custom Submatch Evaluation</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
28 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -1,571 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: sub_match</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">sub_match</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Synopsis</H3>
|
||||
<P>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>>
|
||||
</P>
|
||||
<P>Regular expressions are different from many simple pattern-matching algorithms
|
||||
in that as well as finding an overall match they can also produce
|
||||
sub-expression matches: each sub-expression being delimited in the pattern by a
|
||||
pair of parenthesis (...). There has to be some method for reporting
|
||||
sub-expression matches back to the user: this is achieved this by defining a
|
||||
class <I><A href="match_results.html">match_results</A></I> that acts as an
|
||||
indexed collection of sub-expression matches, each sub-expression match being
|
||||
contained in an object of type <I>sub_match</I>
|
||||
.
|
||||
<P>Objects of type <EM>sub_match</EM> may only obtained by subscripting an object
|
||||
of type <EM><A href="match_results.html">match_results</A></EM>
|
||||
.
|
||||
<P>Objects of type <EM>sub_match</EM> may be compared to objects of type <EM>std::basic_string</EM>,
|
||||
or <EM>const charT*</EM> or <EM>const charT</EM>
|
||||
.
|
||||
<P>Objects of type <EM>sub_match</EM> may be added to objects of type <EM>std::basic_string</EM>,
|
||||
or <EM>const charT* </EM>or <EM>const charT</EM>, to produce a new <EM>std::basic_string
|
||||
</EM>
|
||||
object.
|
||||
<P>When the marked sub-expression denoted by an object of type sub_match<>
|
||||
participated in a regular expression match then member <CODE>matched</CODE> evaluates
|
||||
to true, and members <CODE>first</CODE> and <CODE>second</CODE> denote the
|
||||
range of characters <CODE>[first,second)</CODE> which formed that match.
|
||||
Otherwise <CODE>matched</CODE> is false, and members <CODE>first</CODE> and <CODE>second</CODE>
|
||||
contained undefined values.</P>
|
||||
<P>When the marked sub-expression denoted by an object of type sub_match<>
|
||||
was repeated, then the sub_match object represents the match obtained by the
|
||||
last repeat. The complete set of all the captures obtained for all the
|
||||
repeats, may be accessed via the captures() member function (Note: this has
|
||||
serious performance implications, you have to explicitly enable this feature).</P>
|
||||
<P>If an object of type <CODE>sub_match<></CODE> represents sub-expression 0
|
||||
- that is to say the whole match - then member <CODE>matched</CODE> is always
|
||||
true, unless a partial match was obtained as a result of the flag <CODE>match_partial</CODE>
|
||||
being passed to a regular expression algorithm, in which case member <CODE>matched</CODE>
|
||||
is false, and members <CODE>first</CODE> and <CODE>second</CODE> represent the
|
||||
character range that formed the partial match.</P>
|
||||
<PRE>namespace boost{
|
||||
|
||||
template <class BidirectionalIterator>
|
||||
class sub_match;
|
||||
|
||||
typedef sub_match<const char*> csub_match;
|
||||
typedef sub_match<const wchar_t*> wcsub_match;
|
||||
typedef sub_match<std::string::const_iterator> ssub_match;
|
||||
typedef sub_match<std::wstring::const_iterator> wssub_match;
|
||||
|
||||
template <class BidirectionalIterator>
|
||||
class sub_match : public std::pair<BidirectionalIterator, BidirectionalIterator>
|
||||
{
|
||||
public:
|
||||
typedef typename iterator_traits<BidirectionalIterator>::value_type value_type;
|
||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||
typedef BidirectionalIterator iterator;
|
||||
|
||||
bool <A href="#m1" >matched</A>;
|
||||
|
||||
difference_type <A href="#m2" >length</A>()const;
|
||||
operator <A href="#m3" >basic_string</A><value_type>()const;
|
||||
basic_string<value_type> <A href="#m4" >str</A>()const;
|
||||
|
||||
int <A href="#m5" >compare</A>(const sub_match& s)const;
|
||||
int <A href="#m6" >compare</A>(const basic_string<value_type>& s)const;
|
||||
int <A href="#m7" >compare</A>(const value_type* s)const;
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
typedef implementation-private <A href="#m9">capture_sequence_type</A>;
|
||||
const capture_sequence_type& <A href="#m8" >captures</A>()const;
|
||||
#endif
|
||||
};
|
||||
//
|
||||
// comparisons to another sub_match:
|
||||
//
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o11" >operator</A> == (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o12" >operator</A> != (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o13" >operator</A> < (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o14" >operator</A> <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o15" >operator</A> >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o16" >operator</A> > (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
|
||||
|
||||
//
|
||||
// comparisons to a basic_string:
|
||||
//
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o21" >operator</A> == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o22" >operator</A> != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o23" >operator</A> < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o24" >operator</A> > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o25" >operator</A> >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o26" >operator</A> <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o31" >operator</A> == (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o32" >operator</A> != (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o33" >operator</A> < (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o34" >operator</A> > (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o35" >operator</A> >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool <A href="#o36" >operator</A> <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);
|
||||
|
||||
//
|
||||
// comparisons to a pointer to a character array:
|
||||
//
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o41" >operator</A> == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o42" >operator</A> != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o43" >operator</A> < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o44" >operator</A> > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o45" >operator</A> >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o46" >operator</A> <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o51" >operator</A> == (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o52" >operator</A> != (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o53" >operator</A> < (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o54" >operator</A> > (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o55" >operator</A> >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o56" >operator</A> <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs);
|
||||
|
||||
//
|
||||
// comparisons to a single character:
|
||||
//
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o61" >operator</A> == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o62" >operator</A> != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o63" >operator</A> < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o64" >operator</A> > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o65" >operator</A> >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o66" >operator</A> <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);
|
||||
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o71" >operator</A> == (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o72" >operator</A> != (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o73" >operator</A> < (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o74" >operator</A> > (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o75" >operator</A> >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
template <class BidirectionalIterator>
|
||||
bool <A href="#o76" >operator</A> <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs);
|
||||
//
|
||||
// addition operators:
|
||||
//
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
|
||||
<A href="#o81" >operator</A> + (const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s,
|
||||
const sub_match<BidirectionalIterator>& m);
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
|
||||
<A href="#o82" >operator</A> + (const sub_match<BidirectionalIterator>& m,
|
||||
const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s);
|
||||
template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
<A href="#o83" >operator</A> + (typename iterator_traits<BidirectionalIterator>::value_type const* s,
|
||||
const sub_match<BidirectionalIterator>& m);
|
||||
template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
<A href="#o84" >operator</A> + (const sub_match<BidirectionalIterator>& m,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const * s);
|
||||
template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
<A href="#o85" >operator</A> + (typename iterator_traits<BidirectionalIterator>::value_type const& s,
|
||||
const sub_match<BidirectionalIterator>& m);
|
||||
template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
<A href="#o86" >operator</A> + (const sub_match<BidirectionalIterator>& m,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& s);
|
||||
template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
<A href="#o87" >operator</A> + (const sub_match<BidirectionalIterator>& m1,
|
||||
const sub_match<BidirectionalIterator>& m2);
|
||||
|
||||
//
|
||||
// stream inserter:
|
||||
//
|
||||
template <class charT, class traits, class BidirectionalIterator>
|
||||
basic_ostream<charT, traits>&
|
||||
<A href="#oi" >operator</A> << (basic_ostream<charT, traits>& os,
|
||||
const sub_match<BidirectionalIterator>& m);
|
||||
|
||||
} // namespace boost</PRE>
|
||||
<H3>Description</H3>
|
||||
<H4>sub_match members</H4>
|
||||
<PRE>typedef typename std::iterator_traits<iterator>::value_type value_type;</PRE>
|
||||
<P>The type pointed to by the iterators.</P>
|
||||
<PRE>typedef typename std::iterator_traits<iterator>::difference_type difference_type;</PRE>
|
||||
<P>A type that represents the difference between two iterators.</P>
|
||||
<PRE>typedef iterator iterator_type;</PRE>
|
||||
<P>The iterator type.</P>
|
||||
<PRE>iterator first</PRE>
|
||||
<P>An iterator denoting the position of the start of the match.</P>
|
||||
<PRE>iterator second</PRE>
|
||||
<P>An iterator denoting the position of the end of the match.</P>
|
||||
<PRE><A name=m1></A>bool matched</PRE>
|
||||
<P>A Boolean value denoting whether this sub-expression participated in the match.</P>
|
||||
<PRE><A name=m2></A>static difference_type length();</PRE>
|
||||
<P><B>Effects: </B>returns the length of this matched sub-expression, or 0 if this
|
||||
sub-expression was not matched: <CODE>matched ? distance(first, second) : 0)</CODE>.</P>
|
||||
<PRE><A name=m3></A>operator basic_string<value_type>()const;</PRE>
|
||||
<P><B>Effects: </B>converts *this into a string: returns <CODE>(matched ?
|
||||
basic_string<value_type>(first, second) :
|
||||
basic_string<value_type>()).</P>
|
||||
</CODE><PRE><A name=m4></A>basic_string<value_type> str()const;</PRE>
|
||||
<P><B>Effects: </B>returns a string representation of *this: <CODE>(matched ?
|
||||
basic_string<value_type>(first, second) :
|
||||
basic_string<value_type>())</CODE>.</P>
|
||||
<PRE><A name=m5></A>int compare(const sub_match& s)const;</PRE>
|
||||
<P><B>Effects: </B>performs a lexical comparison to <EM>s</EM>: returns <CODE>str().compare(s.str())</CODE>.</P>
|
||||
<PRE><A name=m6></A>int compare(const basic_string<value_type>& s)const;</PRE>
|
||||
<P><B>Effects: </B>compares *this to the string s: returns <CODE>str().compare(s)</CODE>.</P>
|
||||
<PRE><A name=m7></A>int compare(const value_type* s)const;</PRE>
|
||||
<P><B>Effects:<B></B> </B>compares *this to the null-terminated string <EM>s</EM>:<B> </B>returns
|
||||
<CODE>str().compare(s)</CODE>.</P>
|
||||
<PRE><A name=m9></A>typedef implementation-private capture_sequence_type;</PRE>
|
||||
<P>Defines an implementation-specific type that satisfies the requirements of
|
||||
a standard library Sequence (21.1.1 including the optional Table 68
|
||||
operations), whose value_type is a <EM>sub_match<BidirectionalIterator></EM>. This
|
||||
type happens to be <EM>std::vector<sub_match<BidirectionalIterator> ></EM>,
|
||||
but you shouldn't actually rely on that.</P>
|
||||
<PRE><A name=m8></A>const capture_sequence_type& <A href="#m8" >captures</A>()const; </PRE>
|
||||
<P><STRONG>Effects:</STRONG> returns a sequence containing all the captures
|
||||
obtained for this sub-expression.</P>
|
||||
<P><STRONG>Preconditions:</STRONG> the library must be built and used with
|
||||
BOOST_REGEX_MATCH_EXTRA defined, and you must pass the flag <A href="match_flag_type.html">
|
||||
match_extra</A> to the regex matching functions (<A href="regex_match.html">regex_match</A>,
|
||||
<A href="regex_search.html">regex_search</A>, <A href="regex_iterator.html">regex_iterator</A>
|
||||
or <A href="regex_token_iterator.html">regex_token_iterator</A>) in order for
|
||||
this member function to be defined and return useful information.</P>
|
||||
<P><STRONG>Rationale:</STRONG> Enabling this feature has several consequences:
|
||||
</P>
|
||||
<UL>
|
||||
<LI>
|
||||
sub_match occupies more memory resulting in complex expressions running out of
|
||||
memory or stack space more quickly during matching.
|
||||
<LI>
|
||||
The matching algorithms are less efficient at handling some features
|
||||
(independent sub-expressions for example), even when <EM>match_extra</EM>
|
||||
is not used.
|
||||
<LI>
|
||||
The matching algorithms are much less efficient (i.e. slower), when <EM>match_extra</EM>
|
||||
is used. Mostly this is down to the extra memory allocations that have to
|
||||
take place.</LI></UL>
|
||||
<H4>sub_match non-member operators</H4>
|
||||
<H5>Comparisons against self</H5>
|
||||
<PRE><A name=o11></A>template <class BidirectionalIterator>
|
||||
bool operator == (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) == 0</CODE>.</P>
|
||||
<PRE><A name=o12></A>template <class BidirectionalIterator>
|
||||
bool operator != (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) != 0</CODE>.</P>
|
||||
<PRE><A name=o13></A>template <class BidirectionalIterator>
|
||||
bool operator < (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) < 0</CODE>.</P>
|
||||
<PRE><A name=o14></A>template <class BidirectionalIterator>
|
||||
bool operator <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) <= 0</CODE>.</P>
|
||||
<PRE><A name=o15></A>template <class BidirectionalIterator>
|
||||
bool operator >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) >= 0</CODE>.</P>
|
||||
<PRE><A name=o16></A>template <class BidirectionalIterator>
|
||||
bool operator > (const sub_match<BidirectionalIterator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.compare(rhs) > 0</CODE>.</P>
|
||||
<H5>Comparisons with std::basic_string</H5>
|
||||
<pre><A name=o21></A>
|
||||
template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator == (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits,
|
||||
Allocator>& lhs, const sub_match<BidirectionalIterator>& rhs);
|
||||
</pre>
|
||||
<P><B>Effects: </B>returns <CODE>lhs == rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o22></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator != (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs != rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o23></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator < (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs < rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o24></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator > (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs > rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o25></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator >= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs >= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o26></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator <= (const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs <= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o31></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator == (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() == rhs</CODE>.</P>
|
||||
<PRE><A name=o32></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator != (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() != rhs</CODE>.</P>
|
||||
<PRE><A name=o33></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator < (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() < rhs</CODE>.</P>
|
||||
<PRE><A name=o34></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator > (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() > rhs</CODE>.</P>
|
||||
<PRE><A name=o35></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() >= rhs</CODE>.</P>
|
||||
<PRE><A name=o36></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
bool operator <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
const std::basic_string<iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& rhs);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() <= rhs</CODE>.</P>
|
||||
<H5>Comparisons with null-terminated strings</H5>
|
||||
<PRE><A name=o41></A>template <class BidirectionalIterator>
|
||||
bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs == rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o42></A>template <class BidirectionalIterator>
|
||||
bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs != rhs.str()</CODE>.</P>
|
||||
<PRE></A><A name=o43></A>template <class BidirectionalIterator>
|
||||
bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs < rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o44></A>template <class BidirectionalIterator>
|
||||
bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs > rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o45></A>template <class BidirectionalIterator>
|
||||
bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs >= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o46></A>template <class BidirectionalIterator>
|
||||
bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const* lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs <= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o51></A>template <class BidirectionalIterator>
|
||||
bool operator == (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() == rhs</CODE>.</P>
|
||||
<PRE><A name=o52></A>template <class BidirectionalIterator>
|
||||
bool operator != (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() != rhs</CODE>.</P>
|
||||
<PRE><A name=o53></A>template <class BidirectionalIterator>
|
||||
bool operator < (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() < rhs</CODE>.</P>
|
||||
<PRE><A name=o54></A>template <class BidirectionalIterator>
|
||||
bool operator > (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() > rhs</CODE>.</P>
|
||||
<PRE><A name=o55></A>template <class BidirectionalIterator>
|
||||
bool operator >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() >= rhs</CODE>.</P>
|
||||
<PRE><A name=o56></A>template <class BidirectionalIterator>
|
||||
bool operator <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const* rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() <= rhs</CODE>.</P>
|
||||
<H5>Comparisons with a single character</H5>
|
||||
<PRE><A name=o61></A>template <class BidirectionalIterator>
|
||||
bool operator == (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs == rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o62></A>template <class BidirectionalIterator>
|
||||
bool operator != (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs != rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o63></A>template <class BidirectionalIterator>
|
||||
bool operator < (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs < rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o64></A>template <class BidirectionalIterator>
|
||||
bool operator > (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs > rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o65></A>template <class BidirectionalIterator>
|
||||
bool operator >= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs >= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o66></A>template <class BidirectionalIterator>
|
||||
bool operator <= (typename iterator_traits<BidirectionalIterator>::value_type const& lhs,
|
||||
const sub_match<BidirectionalIterator>& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs <= rhs.str()</CODE>.</P>
|
||||
<PRE><A name=o71></A>template <class BidirectionalIterator>
|
||||
bool operator == (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() == rhs</CODE>.</P>
|
||||
<PRE><A name=o72></A>template <class BidirectionalIterator>
|
||||
bool operator != (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() != rhs</CODE>.</P>
|
||||
<PRE><A name=o73></A>template <class BidirectionalIterator>
|
||||
bool operator < (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() < rhs</CODE>.</P>
|
||||
<PRE><A name=o74></A>template <class BidirectionalIterator>
|
||||
bool operator > (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() > rhs</CODE>.</P>
|
||||
<PRE><A name=o75></A>template <class BidirectionalIterator>
|
||||
bool operator >= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() >= rhs</CODE>.</P>
|
||||
<PRE><A name=o76></A>template <class BidirectionalIterator>
|
||||
bool operator <= (const sub_match<BidirectionalIterator>& lhs,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& rhs); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>lhs.str() <= rhs</CODE>.</P>
|
||||
<h5>Addition operators</h5>
|
||||
<P>The addition operators for sub_match allow you to add a sub_match to any type
|
||||
to which you can add a std::string and obtain a new string as the result.</P>
|
||||
<PRE><A name=o81></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
|
||||
operator + (const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s,
|
||||
const sub_match<BidirectionalIterator>& m); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>s + m.str()</CODE>.</P>
|
||||
<PRE><A name=o82></A>template <class BidirectionalIterator, class traits, class Allocator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>
|
||||
operator + (const sub_match<BidirectionalIterator>& m,
|
||||
const std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type, traits, Allocator>& s); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>m.str() + s</CODE>.</P>
|
||||
<PRE><A name=o83></A>template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
operator + (typename iterator_traits<BidirectionalIterator>::value_type const* s,
|
||||
const sub_match<BidirectionalIterator>& m); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>s + m.str()</CODE>.</P>
|
||||
<PRE><A name=o84></A>template <class BidirectionalIterator> std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
operator + (const sub_match<BidirectionalIterator>& m,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const * s);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>m.str() + s</CODE>.</P>
|
||||
<PRE><A name=o85></A>template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
operator + (typename iterator_traits<BidirectionalIterator>::value_type const& s,
|
||||
const sub_match<BidirectionalIterator>& m); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>s + m.str()</CODE>.</P>
|
||||
<PRE><A name=o86></A>template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
operator + (const sub_match<BidirectionalIterator>& m,
|
||||
typename iterator_traits<BidirectionalIterator>::value_type const& s); </PRE>
|
||||
<P><B>Effects: </B>returns <CODE>m.str() + s</CODE>.</P>
|
||||
<PRE><A name=o87></A>template <class BidirectionalIterator>
|
||||
std::basic_string<typename iterator_traits<BidirectionalIterator>::value_type>
|
||||
operator + (const sub_match<BidirectionalIterator>& m1,
|
||||
const sub_match<BidirectionalIterator>& m2);</PRE>
|
||||
<P><B>Effects: </B>returns <CODE>m1.str() + m2.str()</CODE>.</P>
|
||||
<h5>Stream inserter</h5>
|
||||
<PRE><A name=oi></A>template <class charT, class traits, class BidirectionalIterator>
|
||||
basic_ostream<charT, traits>&
|
||||
operator << (basic_ostream<charT, traits>& os
|
||||
const sub_match<BidirectionalIterator>& m);</PRE>
|
||||
<P>
|
||||
<B>Effects: </B>returns <CODE>(os << m.str())</CODE>.
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
22 Dec 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2004</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,55 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<P>This section covers the regular expression syntax used by this library, this is
|
||||
a programmers guide, the actual syntax presented to your program's users will
|
||||
depend upon the <A href="syntax_option_type.html">flags</A> used during
|
||||
expression compilation.
|
||||
</P>
|
||||
<P>There are three main syntax options available, depending upon how
|
||||
you construct the regular expression object:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
<A href="syntax_perl.html">Perl</A> (this is the default behavior).</LI>
|
||||
<LI>
|
||||
<A href="syntax_extended.html">POSIX extended</A> (including the <A href="syntax_extended.html#egrep">
|
||||
egrep</A> and <A href="syntax_extended.html#awk">awk</A> variations).</LI>
|
||||
<LI>
|
||||
<A href="syntax_basic.html">POSIX Basic</A> (including the <A href="syntax_basic.html#grep">
|
||||
grep</A> and <A href="syntax_basic.html#emacs">emacs</A> variations).</LI></UL>
|
||||
<P>You can also construct a regular expression that treats every character as a <A href="syntax_option_type.html#literals">
|
||||
literal</A>, but that's not really a "syntax"!</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
10 Sept 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,238 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Basic Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX Basic Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Basic">POSIX Basic Syntax</A> <dt><A href="#variations">
|
||||
Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#grep">Grep</A> <dt><A href="#emacs">Emacs</A></dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Basic regular expression syntax is used by the Unix utility <EM>sed</EM>,
|
||||
and variations are used by <EM>grep</EM> and <EM>emacs</EM>. You can
|
||||
construct POSIX basic regular expressions in Boost.Regex by passing the flag <EM>basic</EM>
|
||||
to the regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Basic expression:
|
||||
boost::regex e1(my_expression, boost::regex::basic);
|
||||
// e2 a case insensitive POSIX-Basic expression:
|
||||
boost::regex e2(my_expression, boost::regex::basic|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Basic Syntax<A name="Basic"></A></H3>
|
||||
<P>In POSIX-Basic regular expressions, all characters are match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[\*^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning \( and ending \) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or
|
||||
referred-to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the * operator.</P>
|
||||
<P>For example a* will match any number of letter a's repeated zero or more times
|
||||
(an atom repeated zero times matches an empty string), so the expression a*b
|
||||
will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a\{n\} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a\{n,\} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a\{n, m\} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a\{2,3\}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a\(*\)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^\(a*\).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Basic regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#basic">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the rangle "ae"-c, assuming that "ae" is treated
|
||||
as a single collating element in the current locale.</P>
|
||||
<P>Collating elements may be used in place of escapes (which are not normally
|
||||
allowed inside character sets), for example [[.^.]abc] would match either one
|
||||
of the characters 'abc^'.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with collating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>With the exception of the escape sequences \{, \}, \(, and \), which are
|
||||
documented above, an escape followed by any character matches that
|
||||
character. This can be used to make the special characters .[\*^$,
|
||||
"ordinary". Note that the escape character loses its special meaning
|
||||
inside a character set, so [\^] will match either a literal '\' or a '^'.</P>
|
||||
<H4>What Gets Matched</H4>
|
||||
<P>When there is more that one way to match a regular expression, the "best"
|
||||
possible match is obtained using the <A href="syntax_leftmost_longest.html">leftmost-longest
|
||||
rule</A>.</P>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<H4><A name="grep"></A>Grep</H4>
|
||||
<P>When an expression is compiled with the flag <EM>grep</EM> set, then the
|
||||
expression is treated as a newline separated list of <A href="#Basic">POSIX-Basic</A>
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::grep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>grep</EM>.</P>
|
||||
<H4><A name="emacs"></A>emacs</H4>
|
||||
<P>In addition to the <A href="#Basic">POSIX-Basic features</A> the following
|
||||
characters are also special:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>+ repeats the preceding atom one or more times.</P>
|
||||
<P>? repeats the preceding atom zero or one times.</P>
|
||||
<P>*? A non-greedy version of *.</P>
|
||||
<P>+? A non-greedy version of +.</P>
|
||||
<P>?? A non-greedy version of ?.</P>
|
||||
</BLOCKQUOTE>
|
||||
<P>And the following escape sequences are also recognised:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>\| specifies an alternative.</P>
|
||||
<P>\(?: ... \) is a non-marking grouping construct - allows you to
|
||||
lexically group something without spitting out an extra sub-expression.</P>
|
||||
<P>\w matches any word character.</P>
|
||||
<P>\W matches any non-word character.</P>
|
||||
<P>\sx matches any character in the syntax group <EM>x</EM>, the following emacs
|
||||
groupings are supported: 's', ' ', '_', 'w', '.', ')', '(', '"', '\'', '>'
|
||||
and '<'. Refer to the emacs docs for details.</P>
|
||||
<P>\Sx matches any character not in the syntax grouping <EM>x</EM>.</P>
|
||||
<P>\c and \C are not supported.</P>
|
||||
<P>\` matches zero characters only at the start of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\' matches zero characters only at the end of a buffer (or string being
|
||||
matched).</P>
|
||||
<P>\b matches zero characters at a word boundary.</P>
|
||||
<P>\B matches zero characters, not at a word boundary.</P>
|
||||
<P>\< matches zero characters only at the start of a word.</P>
|
||||
<P>\> matches zero characters only at the end of a word.</P>
|
||||
</BLOCKQUOTE>
|
||||
<P dir="ltr">Finally, you should note that emacs style regular expressions are
|
||||
matched according to the <A href="syntax_perl.html#what">Perl "depth first search"
|
||||
rules</A>. Emacs expressions are matched this way because they contain
|
||||
Perl-like extensions, that do not interact well with the <A href="syntax_leftmost_longest.html">
|
||||
POSIX-style leftmost-longest rule</A>.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#basic">variety of flags</A> that
|
||||
may be combined with the <EM>basic</EM> and <EM>grep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#basic">
|
||||
newline_alt, no_char_classes, no-intervals, bk_plus_qm and bk_plus_vbar</A> options
|
||||
all alter the syntax, while the <A href="syntax_option_type.html#basic">collate
|
||||
and icase</A> options modify how the case and locale sensitivity are to be
|
||||
applied.</P>
|
||||
<H3><A name="refs"></A>References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions (FWD.1).</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</A></P>
|
||||
<P><A href="http://www.gnu.org/software/emacs/">Emacs Version 21.3</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
@ -1,520 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: POSIX-Extended Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">POSIX-Extended Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#extended">POSIX Extended Syntax</A>
|
||||
<dt><A href="#variations">Variations</A>
|
||||
<dd>
|
||||
<dl>
|
||||
<dt><A href="#egrep">egrep</A> <dt><A href="#awk">awk</A> </dt>
|
||||
</dl>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The POSIX-Extended regular expression syntax is supported by the POSIX C
|
||||
regular expression API's, and variations are used by the utilities <EM>egrep</EM>
|
||||
and <EM>awk</EM>. You can construct POSIX extended regular expressions in
|
||||
Boost.Regex by passing the flag <EM>extended</EM> to the regex constructor, for
|
||||
example:</P>
|
||||
<PRE>// e1 is a case sensitive POSIX-Extended expression:
|
||||
boost::regex e1(my_expression, boost::regex::extended);
|
||||
// e2 a case insensitive POSIX-Extended expression:
|
||||
boost::regex e2(my_expression, boost::regex::extended|boost::regex::icase);</PRE>
|
||||
<H3>POSIX Extended Syntax<A name="extended"></A></H3>
|
||||
<P>In POSIX-Extended regular expressions, all characters match themselves except
|
||||
for the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line when used as the first
|
||||
character of an expression, or the first character of a sub-expression.</P>
|
||||
<P>A '$' character shall match the end of a line when used as the last character
|
||||
of an expression, or the last character of a sub-expression.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or
|
||||
referred to by a back-reference.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example the
|
||||
expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<P><EM><STRONG>Caution</STRONG>: the POSIX standard does not support back-references
|
||||
for "extended" regular expressions, this is a compatible extension to that
|
||||
standard.</EM></P>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Extended regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; <EM><STRONG>this results in locale specific behavior</STRONG></EM> .
|
||||
This behavior can be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">
|
||||
collate</A></EM> option flag - in which case whether a character appears
|
||||
within a range is determined by comparing the code points of the characters
|
||||
only.</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated as
|
||||
a single collating element in the current locale.</P>
|
||||
<P>Collating elements may be used in place of escapes (which are not normally
|
||||
allowed inside character sets), for example [[.^.]abc] would match either one
|
||||
of the characters 'abc^'.</P>
|
||||
<P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression oftheform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>The POSIX standard defines no escape sequences for POSIX-Extended regular
|
||||
expressions, except that:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
Any special character preceded by an escape shall match itself.
|
||||
<LI>
|
||||
The effect of any ordinary character being preceded by an escape is undefined.
|
||||
<LI>
|
||||
An escape inside a character class declaration shall match itself: in other
|
||||
words the escape character is not "special" inside a character class
|
||||
declaration; so [\^] will match either a literal '\' or a '^'.</LI></UL>
|
||||
<P>However, that's rather restrictive, so the following standard-compatible
|
||||
extensions are also supported by Boost.Regex:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{Name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>
|
||||
<H5>Character Properties</H5>
|
||||
</H5>
|
||||
<P dir="ltr">The character property names in the following table are all
|
||||
equivalent to the <A href="character_class_names.html">names used in character
|
||||
classes</A>.</P>
|
||||
<H5>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</H5>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE><A name="variations">
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
</A>
|
||||
<H4>What Gets Matched</H4>
|
||||
<P>When there is more that one way to match a regular expression, the "best"
|
||||
possible match is obtained using the <A href="syntax_leftmost_longest.html">leftmost-longest
|
||||
rule</A>.</P>
|
||||
<H3>Variations</H3>
|
||||
<H4>Egrep<A name="egrep"></H4>
|
||||
<P>When an expression is compiled with the flag <EM>egrep</EM> set, then the
|
||||
expression is treated as a newline separated list of POSIX-Extended
|
||||
expressions, a match is found if any of the expressions in the list match, for
|
||||
example:</P>
|
||||
<PRE>boost::regex e("abc\ndef", boost::regex::egrep);</PRE>
|
||||
<P>will match either of the POSIX-Basic expressions "abc" or "def".</P>
|
||||
<P>As its name suggests, this behavior is consistent with the Unix utility <EM>egrep</EM>,
|
||||
and with <EM>grep</EM> when used with the -E option.</P>
|
||||
<H4>awk<A name="awk"></A></H4>
|
||||
<P>In addition to the <A href="#extended">POSIX-Extended features</A> the
|
||||
escape character is special inside a character class declaration. </P>
|
||||
<P>In addition, some escape sequences that are not defined as part of
|
||||
POSIX-Extended specification are required to be supported - however Boost.Regex
|
||||
supports these by default anyway.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#extended">variety of flags</A> that
|
||||
may be combined with the <EM>extended</EM> and <EM>egrep</EM> options when
|
||||
constructing the regular expression, in particular note that the <A href="syntax_option_type.html#extended">
|
||||
newline_alt</A> option alters the syntax, while the <A href="syntax_option_type.html#extended">
|
||||
collate, nosubs and icase</A> options modify how the case and locale
|
||||
sensitivity are to be applied.</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap09.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions
|
||||
and Headers, Section 9, Regular Expressions.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/grep.html"> IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, egrep.</A></P>
|
||||
<P><A href="http://www.opengroup.org/onlinepubs/000095399/utilities/awk.html">IEEE
|
||||
Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, awk.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
@ -1,65 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Index</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">The "Leftmost Longest" Rule</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Often there is more than one way of matching a regular expression at a
|
||||
particular location, for POSIX basic and extended regular expressions, the
|
||||
"best" match is determined as follows:</P>
|
||||
<OL>
|
||||
<LI>
|
||||
Find the leftmost match, if there is only one match possible at this location
|
||||
then return it.</LI>
|
||||
<LI>
|
||||
Find the longest of the possible matches, along with any ties. If there
|
||||
is only one such possible match then return it.</LI>
|
||||
<LI>
|
||||
If there are no marked sub-expressions, then all the remaining alternatives are
|
||||
indistinguishable; return the first of these found.</LI>
|
||||
<LI>
|
||||
Find the match which has matched the first sub-expression in the leftmost
|
||||
position, along with any ties. If there is only on such match possible
|
||||
then return it.</LI>
|
||||
<LI>
|
||||
Find the match which has the longest match for the first sub-expression, along
|
||||
with any ties. If there is only one such match then return it.</LI>
|
||||
<LI>
|
||||
Repeat steps 3 and 4 for each additional marked sub-expression.</LI>
|
||||
<LI>
|
||||
If there is still more than one possible match remaining, then they are
|
||||
indistinguishable; return the first one found.</LI></OL>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
16 Dec 2004</p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,543 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: syntax_option_type</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">syntax_option_type</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><a href="#synopsis">Synopsis</a> <dt><a href="#description">Description</a>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="#perl">Options for Perl Regular Expressions</a> <dt><a href="#extended">Options
|
||||
for POSIX Extended Regular Expressions</a> <dt><a href="#basic">Options for POSIX
|
||||
Basic Regular Expressions</a> <dt><a href="#literals">Options for String Literals</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>Type syntax_option type is an implementation specific bitmask type that
|
||||
controls how a regular expression string is to be interpreted. For
|
||||
convenience note that all the constants listed here, are also duplicated within
|
||||
the scope of class template <A href="basic_regex.html">basic_regex</A>.</P>
|
||||
<PRE>namespace std{ namespace regex_constants{
|
||||
|
||||
typedef <EM>implementation-specific-bitmask-type</EM> syntax_option_type;<BR>
|
||||
// these flags are standardized:
|
||||
static const syntax_option_type normal;
|
||||
static const syntax_option_type ECMAScript = normal;
|
||||
static const syntax_option_type JavaScript = normal;
|
||||
static const syntax_option_type JScript = normal;
|
||||
static const syntax_option_type perl = normal;<BR>static const syntax_option_type basic;
|
||||
static const syntax_option_type sed = basic;
|
||||
static const syntax_option_type extended;
|
||||
static const syntax_option_type awk;
|
||||
static const syntax_option_type grep;
|
||||
static const syntax_option_type egrep;
|
||||
static const syntax_option_type icase;
|
||||
static const syntax_option_type nosubs;
|
||||
static const syntax_option_type optimize;
|
||||
static const syntax_option_type collate;
|
||||
// other boost.regex specific options are listed below<BR>
|
||||
} // namespace regex_constants
|
||||
} // namespace std</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
<P>The type <CODE>syntax_option_type</CODE> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). Setting its elements has the effects listed in the table
|
||||
below, a valid value of type <CODE>syntax_option_type</CODE> will always have
|
||||
exactly one of the elements <CODE>normal, basic, extended, awk, grep, egrep, sed,
|
||||
literal or perl</CODE> set.</P>
|
||||
<P>Note that for convenience all the constants listed here are duplicated within
|
||||
the scope of class template basic_regex, so you can use any of:</P>
|
||||
<PRE>boost::regex_constants::constant_name</PRE>
|
||||
<P>or</P>
|
||||
<PRE>boost::regex::constant_name</PRE>
|
||||
<P>or</P>
|
||||
<PRE>boost::wregex::constant_name</PRE>
|
||||
<P>in an interchangeable manner.</P>
|
||||
<H4><A name="perl"></A>Options for Perl Regular Expressions:</H4>
|
||||
<P>One of the following must always be set for perl regular expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Element</STRONG></TD>
|
||||
<TD><STRONG>Standardized</STRONG></TD>
|
||||
<TD><STRONG>Effect when set</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<P>ECMAScript</P>
|
||||
</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine uses its
|
||||
normal semantics: that is the same as that given in the ECMA-262, ECMAScript
|
||||
Language Specification, Chapter 15 part 10, RegExp (Regular Expression) Objects
|
||||
(FWD.1).</P>
|
||||
<P>boost.regex also recognizes all of the perl-compatible (?...) extensions in
|
||||
this mode.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>perl</TD>
|
||||
<TD>No</TD>
|
||||
<TD>As above.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>normal</TD>
|
||||
<TD>No</TD>
|
||||
<TD>As above.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>JavaScript</TD>
|
||||
<TD>No</TD>
|
||||
<TD>As above.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>JScript</TD>
|
||||
<TD>No</TD>
|
||||
<TD>As above.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be set when using perl-style regular
|
||||
expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Element</STRONG></TD>
|
||||
<TD><STRONG>Standardized</STRONG></TD>
|
||||
<TD><STRONG>Effect when set</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>icase</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that matching of regular expressions against a character container
|
||||
sequence shall be performed without regard to case.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nosubs</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that when a regular expression is matched against a character
|
||||
container sequence, then no sub-expression matches are to be stored in the
|
||||
supplied match_results structure.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>optimize</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the regular expression engine should pay more attention to the
|
||||
speed with which regular expressions are matched, and less to the speed with
|
||||
which regular expression objects are constructed. Otherwise it has no
|
||||
detectable effect on the program output. This currently has no effect for
|
||||
Boost.Regex.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>collate</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale sensitive.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline_alt</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the \n character has the same effect as the alternation
|
||||
operator |. Allows newline separated lists to be used as a list of
|
||||
alternatives.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_except</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Prevents basic_regex from throwing an exception when an invalid expression is
|
||||
encountered.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_mod_m</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Normally Boost.Regex behaves as if the Perl m-modifier is on: so the
|
||||
assertions ^ and $ match after and before embedded newlines respectively,
|
||||
setting this flags is equivalent to prefixing the expression with (?-m).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_mod_s</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Normally whether Boost.Regex will match "." against a newline character is
|
||||
determined by the <A href="match_flag_type.html">match flag</A> match_dot_not_newline.
|
||||
Specifying this flag is equivalent to prefixing the expression with (?-s) and
|
||||
therefore causes "." not to match a newline character regardless of whether
|
||||
match_not_dot_newline is set in the match flags.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>mod_s</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Normally whether Boost.Regex will match "." against a newline character is
|
||||
determined by the <A href="match_flag_type.html">match flag</A> match_dot_not_newline.
|
||||
Specifying this flag is equivalent to prefixing the expression with (?s) and
|
||||
therefore causes "." to match a newline character regardless of whether
|
||||
match_not_dot_newline is set in the match flags.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>mod_x</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Turns on the perl x-modifier: causes unescaped whitespace in the expression to
|
||||
be ignored.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4><A name="extended"></A>Options for POSIX Extended Regular Expressions:</H4>
|
||||
<P>Exactly one of the following must always be set for POSIX extended regular
|
||||
expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Element</TD>
|
||||
<TD>Standardized</TD>
|
||||
<TD>Effect when set</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>extended</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX extended regular expressions in IEEE Std
|
||||
1003.1-2001, Portable Operating System Interface (POSIX ), Base Definitions and
|
||||
Headers, Section 9, Regular Expressions (FWD.1).
|
||||
</P>
|
||||
<P>In addition some perl-style escape sequences are supported (The POSIX standard
|
||||
specifies that only "special" characters may be escaped, all other escape
|
||||
sequences result in undefined behavior).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>egrep</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX utility grep when given the -E option in IEEE Std
|
||||
1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</P>
|
||||
<P>That is to say, the same as POSIX extended syntax, but with the newline
|
||||
character acting as an alternation character in addition to "|".</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>awk</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by POSIX utility awk in IEEE Std 1003.1-2001, Portable
|
||||
Operating System Interface (POSIX ), Shells and Utilities, Section 4, awk
|
||||
(FWD.1).</P>
|
||||
<P>That is to say: the same as POSIX extended syntax, but with escape sequences in
|
||||
character classes permitted.</P>
|
||||
<P>In addition some perl-style escape sequences are supported (actually the awk
|
||||
syntax only requires \a \b \t \v \f \n and \r to be recognised, all other
|
||||
Perl-style escape sequences invoke undefined behavior according to the POSIX
|
||||
standard, but are in fact recognised by Boost.Regex).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be set when using POSIX extended regular
|
||||
expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Element</STRONG></TD>
|
||||
<TD><STRONG>Standardized</STRONG></TD>
|
||||
<TD><STRONG>Effect when set</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>icase</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that matching of regular expressions against a character container
|
||||
sequence shall be performed without regard to case.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nosubs</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that when a regular expression is matched against a character
|
||||
container sequence, then no sub-expression matches are to be stored in the
|
||||
supplied match_results structure.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>optimize</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the regular expression engine should pay more attention to the
|
||||
speed with which regular expressions are matched, and less to the speed with
|
||||
which regular expression objects are constructed. Otherwise it has no
|
||||
detectable effect on the program output. This currently has no effect for
|
||||
boost.regex.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>collate</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale
|
||||
sensitive. <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for
|
||||
POSIX-Extended regular expressions, but can be unset to force ranges to be
|
||||
compared by code point only.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline_alt</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the \n character has the same effect as the alternation
|
||||
operator |. Allows newline separated lists to be used as a list of
|
||||
alternatives.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_escape_in_lists</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set this makes the escape character ordinary inside lists, so that [\b]
|
||||
would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for
|
||||
POSIX-Extended regular expressions, but can be unset to force escapes to be
|
||||
recognised inside lists.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_bk_refs</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set then backreferences are disabled. <STRONG>This bit is</STRONG> <STRONG>
|
||||
on by default</STRONG> for POSIX-Extended regular expressions, but can be
|
||||
unset to support for backreferences on.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_except</TD>
|
||||
<TD>
|
||||
<TD></TD>
|
||||
<TD>No</TD>
|
||||
<TD>Prevents basic_regex from throwing an exception when an invalid expression is
|
||||
encountered.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4><A name="basic"></A>Options for POSIX Basic Regular Expressions:</H4>
|
||||
<P>Exactly one of the following must always be set for POSIX basic regular
|
||||
expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table8" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Element</TD>
|
||||
<TD>Standardized</TD>
|
||||
<TD>Effect When Set</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>basic</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by <A href="syntax_basic.html#Basic">POSIX basic regular
|
||||
expressions</A> in IEEE Std 1003.1-2001, Portable Operating System Interface
|
||||
(POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1).
|
||||
</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>sed</TD>
|
||||
<TD>No</TD>
|
||||
<TD>As Above.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>grep</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the grammar recognized by the regular expression engine is the
|
||||
same as that used by <A href="syntax_basic.html#grep">POSIX utility grep</A> in
|
||||
IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), Shells and
|
||||
Utilities, Section 4, Utilities, grep (FWD.1).</P>
|
||||
<P>That is to say, the same as POSIX basic syntax, but with the newline character
|
||||
acting as an alternation character; the expression is treated as a newline
|
||||
separated list of alternatives.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>emacs</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the grammar recognised is the superset of the POSIX-Basic
|
||||
syntax used by the <A href="syntax_basic.html#emacs">emacs</A> program.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be set when using POSIX basic regular
|
||||
expressions:</P>
|
||||
<P>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Element</STRONG></TD>
|
||||
<TD><STRONG>Standardized</STRONG></TD>
|
||||
<TD><STRONG>Effect when set</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>icase</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that matching of regular expressions against a character container
|
||||
sequence shall be performed without regard to case.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>nosubs</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that when a regular expression is matched against a character
|
||||
container sequence, then no sub-expression matches are to be stored in the
|
||||
supplied match_results structure.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>optimize</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the regular expression engine should pay more attention to the
|
||||
speed with which regular expressions are matched, and less to the speed with
|
||||
which regular expression objects are constructed. Otherwise it has no
|
||||
detectable effect on the program output. This currently has no effect for
|
||||
boost.regex.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>collate</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that character ranges of the form "[a-b]" should be locale
|
||||
sensitive. <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for
|
||||
POSIX-Basic regular expressions, but can be unset to force ranges to be
|
||||
compared by code point only.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>newline_alt</TD>
|
||||
<TD>No</TD>
|
||||
<TD>Specifies that the \n character has the same effect as the alternation
|
||||
operator |. Allows newline separated lists to be used as a list of
|
||||
alternatives. This bit is already set, if you use the <EM>grep</EM> option.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_char_classes</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set then character classes such as [[:alnum:]] are not allowed.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_escape_in_lists</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set this makes the escape character ordinary inside lists, so that [\b]
|
||||
would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for
|
||||
POSIX-basic regular expressions, but can be unset to force escapes to be
|
||||
recognised inside lists.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_intervals</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set then bounded repeats such as a{2,3} are not permitted.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>bk_plus_qm</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set then \? acts as a zero-or-one repeat operator, and \+ acts as a
|
||||
one-or-more repeat operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>bk_vbar</TD>
|
||||
<TD>No</TD>
|
||||
<TD>When set then \| acts as the alternation operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>no_except</TD>
|
||||
<TD>
|
||||
<TD></TD>
|
||||
<TD>No</TD>
|
||||
<TD>Prevents basic_regex from throwing an exception when an invalid expression is
|
||||
encountered.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H4><A name="literals"></A>Options for Literal Strings:</H4>
|
||||
<P>The following must always be set to interpret the expression as a string
|
||||
literal:</P>
|
||||
<P>
|
||||
<TABLE id="Table10" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Element</TD>
|
||||
<TD>Standardized</TD>
|
||||
<TD>Effect when set</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>literal</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>Treat the string as a literal (no special characters).</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>The following options may also be combined with the <EM>literal</EM> flag:</P>
|
||||
<P>
|
||||
<TABLE id="Table11" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Element</TD>
|
||||
<TD>Standardized</TD>
|
||||
<TD>Effect when set</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>icase</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that matching of regular expressions against a character container
|
||||
sequence shall be performed without regard to case.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>optimize</TD>
|
||||
<TD>Yes</TD>
|
||||
<TD>
|
||||
<P>Specifies that the regular expression engine should pay more attention to the
|
||||
speed with which regular expressions are matched, and less to the speed with
|
||||
which regular expression objects are constructed. Otherwise it has no
|
||||
detectable effect on the program output. This currently has no effect for
|
||||
boost.regex.</P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P> </P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
23 June 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
@ -1,626 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Perl Regular Expression Syntax</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td vAlign="top" width="300">
|
||||
<h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">
|
||||
Perl Regular Expression Syntax</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#synopsis">Synopsis</A> <dt><A href="#Perl">Perl Syntax</A> <dt><A href="#what">
|
||||
What Gets Matched</A> <dt><A href="#variations">Variations</A>
|
||||
<dd>
|
||||
<dt><A href="#options">Options</A> <dt><A href="#mods">Modifiers</A> <dt><A href="#refs">References</A></dt>
|
||||
</dl>
|
||||
<H3><A name="synopsis"></A>Synopsis</H3>
|
||||
<P>The Perl regular expression syntax is based on that used by the programming
|
||||
language <EM>Perl</EM> . Perl regular expressions are the default
|
||||
behavior in Boost.Regex or you can pass the flag <EM>perl</EM> to the
|
||||
regex constructor, for example:</P>
|
||||
<PRE>// e1 is a case sensitive Perl regular expression:
|
||||
// since Perl is the default option there's no need to explicitly specify the syntax used here:
|
||||
boost::regex e1(my_expression);
|
||||
// e2 a case insensitive Perl regular expression:
|
||||
boost::regex e2(my_expression, boost::regex::perl|boost::regex::icase);</PRE>
|
||||
<H3>Perl Regular Expression Syntax<A name="Perl"></A></H3>
|
||||
<P>In Perl regular expressions, all characters match themselves except for
|
||||
the following special characters:</P>
|
||||
<PRE>.[{()\*+?|^$</PRE>
|
||||
<H4>Wildcard:</H4>
|
||||
<P>The single character '.' when used outside of a character set will match any
|
||||
single character except:</P>
|
||||
<P>The NULL character when the flag <EM>match_no_dot_null</EM> is passed to the
|
||||
matching algorithms.</P>
|
||||
<P>The newline character when the flag <EM>match_not_dot_newline</EM> is passed to
|
||||
the matching algorithms.</P>
|
||||
<H4>Anchors:</H4>
|
||||
<P>A '^' character shall match the start of a line.</P>
|
||||
<P>A '$' character shall match the end of a line.</P>
|
||||
<H4>Marked sub-expressions:</H4>
|
||||
<P>A section beginning ( and ending ) acts as a marked sub-expression.
|
||||
Whatever matched the sub-expression is split out in a separate field by the
|
||||
matching algorithms. Marked sub-expressions can also repeated, or
|
||||
referred to by a back-reference.</P>
|
||||
<H4>Non-marking grouping:</H4>
|
||||
<P>A marked sub-expression is useful to lexically group part of a regular
|
||||
expression, but has the side-effect of spitting out an extra field in the
|
||||
result. As an alternative you can lexically group part of a regular
|
||||
expression, without generating a marked sub-expression by using (?: and ) , for
|
||||
example (?:ab)+ will repeat "ab" without splitting out any separate
|
||||
sub-expressions.</P>
|
||||
<H4>Repeats:</H4>
|
||||
<P>Any atom (a single character, a marked sub-expression, or a character class)
|
||||
can be repeated with the *, +, ?, and {} operators.</P>
|
||||
<P>The * operator will match the preceding atom zero or more times, for example
|
||||
the expression a*b will match any of the following:</P>
|
||||
<PRE>b
|
||||
ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>The + operator will match the preceding atom one or more times, for example the
|
||||
expression a+b will match any of the following:</P>
|
||||
<PRE>ab
|
||||
aaaaaaaab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>b</PRE>
|
||||
<P>The ? operator will match the preceding atom zero or one times, for
|
||||
example the expression ca?b will match any of the following:</P>
|
||||
<PRE>cb
|
||||
cab</PRE>
|
||||
<P>But will not match:</P>
|
||||
<PRE>caab</PRE>
|
||||
<P>An atom can also be repeated with a bounded repeat:</P>
|
||||
<P>a{n} Matches 'a' repeated exactly <EM>n</EM> times.</P>
|
||||
<P>a{n,} Matches 'a' repeated <EM>n</EM> or more times.</P>
|
||||
<P>a{n, m} Matches 'a' repeated between <EM>n</EM> and <EM>m</EM> times
|
||||
inclusive.</P>
|
||||
<P>For example:</P>
|
||||
<PRE>^a{2,3}$</PRE>
|
||||
<P>Will match either of:</P>
|
||||
<PRE>aa
|
||||
aaa</PRE>
|
||||
<P>But neither of:</P>
|
||||
<PRE>a
|
||||
aaaa</PRE>
|
||||
<P>It is an error to use a repeat operator, if the preceding construct can not be
|
||||
repeated, for example:</P>
|
||||
<PRE>a(*)</PRE>
|
||||
<P>Will raise an error, as there is nothing for the * operator to be applied to.</P>
|
||||
<H4>Non greedy repeats</H4>
|
||||
<P>The normal repeat operators are "greedy", that is to say they will consume as
|
||||
much input as possible. There are non-greedy versions available that will
|
||||
consume as little input as possible while still producing a match.</P>
|
||||
<P>*? Matches the previous atom zero or more times, while consuming as little
|
||||
input as possible.</P>
|
||||
<P>+? Matches the previous atom one or more times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>?? Matches the previous atom zero or one times, while consuming as little input
|
||||
as possible.</P>
|
||||
<P>{n,}? Matches the previous atom <EM>n</EM> or more times, while consuming
|
||||
as little input as possible.</P>
|
||||
<P>{n,m}? Matches the previous atom between <EM>n</EM> and <EM>m</EM> times,
|
||||
while consuming as little input as possible.</P>
|
||||
<H4>Back references:</H4>
|
||||
<P>An escape character followed by a digit <EM>n</EM>, where <EM>n </EM>is in the
|
||||
range 1-9, matches the same string that was matched by sub-expression <EM>n</EM>.
|
||||
For example the expression:</P>
|
||||
<PRE>^(a*).*\1$</PRE>
|
||||
<P>Will match the string:</P>
|
||||
<PRE>aaabbaaa</PRE>
|
||||
<P>But not the string:</P>
|
||||
<PRE>aaabba</PRE>
|
||||
<H4>Alternation</H4>
|
||||
<P>The | operator will match either of its arguments, so for example: abc|def will
|
||||
match either "abc" or "def".
|
||||
</P>
|
||||
<P>Parenthesis can be used to group alternations, for example: ab(d|ef) will match
|
||||
either of "abd" or "abef".</P>
|
||||
<P>Empty alternatives are not allowed (these are almost always a mistake),
|
||||
but if you really want an empty alternative use (?:) as a placeholder, for
|
||||
example:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<P>"|abc" is not a valid expression, but<BR>
|
||||
"(?:)|abc" is and is equivalent, also the expression:<BR>
|
||||
"(?:abc)??" has exactly the same effect.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H4>Character sets:</H4>
|
||||
<P>A character set is a bracket-expression starting with [ and ending with ], it
|
||||
defines a set of characters, and matches any single character that is a member
|
||||
of that set.</P>
|
||||
<P>A bracket expression may contain any combination of the following:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Single characters:</H5>
|
||||
<P>For example [abc], will match any of the characters 'a', 'b', or 'c'.</P>
|
||||
<H5>Character ranges:</H5>
|
||||
<P>For example [a-c] will match any single character in the range 'a' to
|
||||
'c'. By default, for POSIX-Perl regular expressions, a character <EM>x</EM>
|
||||
is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that
|
||||
range; this results in locale specific behavior. This behavior can
|
||||
be turned off by unsetting the <EM><A href="syntax_option_type.html#Perl">collate</A></EM>
|
||||
option flag - in which case whether a character appears within a range is
|
||||
determined by comparing the code points of the characters only</P>
|
||||
<H5>Negation:</H5>
|
||||
<P>If the bracket-expression begins with the ^ character, then it matches the
|
||||
complement of the characters it contains, for example [^a-c] matches any
|
||||
character that is not in the range a-c.</P>
|
||||
<H5>Character classes:</H5>
|
||||
<P>An expression of the form [[:name:]] matches the named character class "name",
|
||||
for example [[:lower:]] matches any lower case character. See <A href="character_class_names.html">
|
||||
character class names</A>.</P>
|
||||
<H5>Collating Elements:</H5>
|
||||
<P>An expression of the form [[.col.] matches the collating element <EM>col</EM>.
|
||||
A collating element is any single character, or any sequence of characters that
|
||||
collates as a single unit. Collating elements may also be used as the end
|
||||
point of a range, for example: [[.ae.]-c] matches the character sequence "ae",
|
||||
plus any single character in the range "ae"-c, assuming that "ae" is treated as
|
||||
a single collating element in the current locale.</P>
|
||||
<P>As an extension, a collating element may also be specified via it's <A href="collating_names.html">
|
||||
symbolic name</A>, for example:</P>
|
||||
<P>[[.NUL.]]</P>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression oftheform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
accentation, or locale-specific tailorings; so for example [[=a=]] matches any
|
||||
of the characters: a, <20>, <20>, <20>, <20>, <20>, <20>, A, <20>, <20>, <20>, <20>, <20> and <20>.
|
||||
Unfortunately implementation of this is reliant on the platform's collation and
|
||||
localisation support; this feature can not be relied upon to work portably
|
||||
across all platforms, or even all locales on one platform.</P>
|
||||
<H5>Escapes:</H5>
|
||||
<P>All the escape sequences that match a single character, or a single character
|
||||
class are permitted within a character class definition, <EM>except</EM> the
|
||||
negated character classes (\D \W etc).</P>
|
||||
</BLOCKQUOTE>
|
||||
<H5>Combinations:</H5>
|
||||
<P>All of the above can be combined in one character set declaration, for example:
|
||||
[[:digit:]a-c[.NUL.]].</P>
|
||||
<H4>Escapes</H4>
|
||||
<P>Any special character preceded by an escape shall match itself.
|
||||
</P>
|
||||
<P>The following escape sequences are also supported:</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5>Escapes matching a specific character</H5>
|
||||
<P>The following escape sequences are all synonyms for single characters:</P>
|
||||
<P>
|
||||
<TABLE id="Table7" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape</STRONG></TD>
|
||||
<TD><STRONG>Character</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\a</TD>
|
||||
<TD>'\a'</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\e</TD>
|
||||
<TD>0x1B</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\f</TD>
|
||||
<TD>\f</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\n</TD>
|
||||
<TD>\n</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\r</TD>
|
||||
<TD>\r</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\t</TD>
|
||||
<TD>\t</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\v</TD>
|
||||
<TD>\v</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>\b (but only inside a character class declaration).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\cX</TD>
|
||||
<TD>An ASCII escape sequence - the character whose code point is X % 32</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\xdd</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\x{dddd}</TD>
|
||||
<TD>A hexadecimal escape sequence - matches the single character whose code point
|
||||
is 0xdddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\0ddd</TD>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
<P>Any escaped character <EM>x</EM>, if <EM>x</EM> is the name of a character
|
||||
class shall match any character that is a member of that class, and any escaped
|
||||
character <EM>X</EM>, if <EM>x</EM> is the name of a character class, shall
|
||||
match any character not in that class.</P>
|
||||
<P>The following are supported by default:</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="300" border="1">
|
||||
<TR>
|
||||
<TD><STRONG>Escape sequence</STRONG></TD>
|
||||
<TD><STRONG>Equivalent to</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\d</TD>
|
||||
<TD>[[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\l</TD>
|
||||
<TD>[[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\s</TD>
|
||||
<TD>[[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\u</TD>
|
||||
<TD>[[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\w</TD>
|
||||
<TD>[[:word:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\D</TD>
|
||||
<TD>[^[:digit:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\L</TD>
|
||||
<TD>[^[:lower:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\S</TD>
|
||||
<TD>[^[:space:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\U</TD>
|
||||
<TD>[^[:upper:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\W</TD>
|
||||
<TD>[^[:word:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Character Properties</H5>
|
||||
<P>The character property names in the following table are all equivalent to the <A href="character_class_names.html">
|
||||
names used in character classes</A>.</P>
|
||||
<P>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
<TABLE id="Table4" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\<</TD>
|
||||
<TD>Matches the start of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\></TD>
|
||||
<TD>Matches the end of a word.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\b</TD>
|
||||
<TD>Matches a word boundary (the start or end of a word).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\B</TD>
|
||||
<TD>Matches only when not at a word boundary.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Buffer boundaries</H5>
|
||||
<P>The following match only at buffer boundaries: a "buffer" in this context is
|
||||
the whole of the input text that is being matched against (note that ^ and
|
||||
$ may match embedded newlines within the text).</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\`</TD>
|
||||
<TD>Matches at the start of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\'</TD>
|
||||
<TD>Matches at the end of a buffer only.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\A</TD>
|
||||
<TD>Matches at the start of a buffer only (the same as \`).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\z</TD>
|
||||
<TD>Matches at the end of a buffer only (the same as \').</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\Z</TD>
|
||||
<TD>Matches an optional sequence of newlines at the end of a buffer: equivalent to
|
||||
the regular expression \n*\z</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Continuation Escape</H5>
|
||||
<P>The sequence \G matches only at the end of the last match found, or at the
|
||||
start of the text being matched if no previous match was found. This
|
||||
escape useful if you're iterating over the matches contained within a text, and
|
||||
you want each subsequence match to start where the last one ended.</P>
|
||||
<H5>Quoting escape</H5>
|
||||
<P>The escape sequence \Q begins a "quoted sequence": all the subsequent
|
||||
characters are treated as literals, until either the end of the regular
|
||||
expression or \E is found. For example the expression: \Q\*+\Ea+ would
|
||||
match either of:</P>
|
||||
<PRE>\*+a<BR>\*+aaa</PRE>
|
||||
<H5>Unicode escapes</H5>
|
||||
<P>
|
||||
<TABLE id="Table6" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>\C</TD>
|
||||
<TD>Matches a single code point: in Boost regex this has exactly the same effect
|
||||
as a "." operator.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\X</TD>
|
||||
<TD>Matches a combining character sequence: that is any non-combining character
|
||||
followed by a sequence of zero or more combining characters.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Any other escape</H5>
|
||||
<P>Any other escape sequence matches the character that is escaped, for example \@
|
||||
matches a literal <A href="mailto:'@'">'@'</A>.</P>
|
||||
</BLOCKQUOTE>
|
||||
<H4 dir="ltr">Perl Extended Patterns</H4>
|
||||
<P dir="ltr">Perl-specific extensions to the regular expression syntax all start
|
||||
with (?.</P>
|
||||
<BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
|
||||
<H5 dir="ltr">Comments</H5>
|
||||
<P dir="ltr">(?# ... ) is treated as a comment, it's contents are ignored.</P>
|
||||
<H5 dir="ltr">Modifiers</H5>
|
||||
<P dir="ltr">(?imsx-imsx ... ) alters which of the perl modifiers are in effect
|
||||
within the pattern, changes take effect from the point that the block is first
|
||||
seen and extend to any enclosing ). Letters before a '-' turn that perl
|
||||
modifier on, letters afterward, turn it off.</P>
|
||||
<P dir="ltr">(?imsx-imsx:pattern) applies the specified modifiers to <EM>pattern</EM>
|
||||
only.</P>
|
||||
<H5 dir="ltr">Non-marking grouping</H5>
|
||||
<P dir="ltr">(?:pattern) lexically groups <EM>pattern</EM>, without generating an
|
||||
additional sub-expression.</P>
|
||||
<H5 dir="ltr">Lookahead</H5>
|
||||
<P dir="ltr">(?=pattern) consumes zero characters, only if <EM>pattern</EM> matches.</P>
|
||||
<P dir="ltr">(?!pattern) consumes zero characters, only if <EM>pattern</EM> does
|
||||
not match.</P>
|
||||
<P dir="ltr">Lookahead is typically used to create the logical AND of two regular
|
||||
expressions, for example if a password must contain a lower case letter, an
|
||||
upper case letter, a punctuation symbol, and be at least 6 characters long,
|
||||
then the expression:</P>
|
||||
<PRE dir="ltr">(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}</PRE>
|
||||
<P dir="ltr">could be used to validate the password.</P>
|
||||
<H5 dir="ltr">Lookbehind</H5>
|
||||
<P dir="ltr">(?<=pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<P dir="ltr">(?<!pattern) consumes zero characters, only if <EM>pattern</EM> could
|
||||
not be matched against the characters preceding the current position (<EM>pattern</EM>
|
||||
must be of fixed length).</P>
|
||||
<H5 dir="ltr">Independent sub-expressions</H5>
|
||||
<P dir="ltr">(?>pattern) <EM>pattern</EM> is matched independently of the
|
||||
surrounding patterns, the expression will never backtrack into <EM>pattern</EM>.
|
||||
Independent sub-expressions are typically used to improve performance; only the
|
||||
best possible match for <EM>pattern</EM> will be considered, if this doesn't
|
||||
allow the expression as a whole to match then no match is found at all.</P>
|
||||
<H5 dir="ltr">Conditional Expressions</H5>
|
||||
<P dir="ltr">(?(condition)yes-pattern|no-pattern) attempts to match <EM>yes-pattern</EM>
|
||||
if the <EM>condition </EM>is true, otherwise attempts to match <EM>no-pattern</EM>.</P>
|
||||
<P dir="ltr">(?(condition)yes-pattern) attempts to match <EM>yes-pattern</EM> if
|
||||
the <EM>condition </EM>is true, otherwise fails.</P>
|
||||
<P dir="ltr"><EM>Condition</EM> may be either a forward lookahead assert, or the
|
||||
index of a marked sub-expression (the condition becomes true if the
|
||||
sub-expression has been matched).</P>
|
||||
</BLOCKQUOTE><A name="what">
|
||||
<H4>Operator precedence</H4>
|
||||
<P> The order of precedence for of operators is as shown in the following
|
||||
table:</P>
|
||||
<P>
|
||||
<TABLE id="Table2" cellSpacing="1" cellPadding="1" width="100%" border="1">
|
||||
<TR>
|
||||
<TD>Collation-related bracket symbols</TD>
|
||||
<TD>[==] [::] [..]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Escaped characters
|
||||
</TD>
|
||||
<TD>\</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Character set (bracket expression)
|
||||
</TD>
|
||||
<TD>[]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Grouping</TD>
|
||||
<TD>()</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Single-character-ERE duplication
|
||||
</TD>
|
||||
<TD>* + ? {m,n}</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Concatenation</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Anchoring</TD>
|
||||
<TD>^$</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Alternation</TD>
|
||||
<TD>|</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
</A>
|
||||
<H3>What gets matched</H3>
|
||||
<P>If you view the regular expression as a directed (possibly cyclic) graph, then
|
||||
the best match found is the first match found by a depth-first-search performed
|
||||
on that graph, while matching the input text.</P>
|
||||
<P>Alternatively:</P>
|
||||
<P>the best match found is the leftmost match, with individual elements matched as
|
||||
follows;</P>
|
||||
<P>
|
||||
<TABLE id="Table8" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Construct</STRONG></TD>
|
||||
<TD><STRONG>What gets matches</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>AtomA AtomB</TD>
|
||||
<TD>Locates the best match for AtomA that has a following match for AtomB.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>Expression1 | Expression2</TD>
|
||||
<TD>If Expresion1 can be matched then returns that match, otherwise attempts to
|
||||
match Expression2.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>S{N}</TD>
|
||||
<TD>Matches S repeated exactly N times.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>S{N,M}</TD>
|
||||
<TD>Matches S repeated between N and M times, and as many times as possible.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>S{N,M}?</TD>
|
||||
<TD>Matches S repeated between N and M times, and as few times as possible.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD><!--StartFragment --> S?, S*, S+</TD>
|
||||
<TD><!--StartFragment --> The same as <CODE>S{0,1}</CODE>, <CODE>S{0,UINT_MAX}</CODE>,
|
||||
<CODE>S{1,UINT_MAX}</CODE> respectively.
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>S??, S*?, S+?</TD>
|
||||
<TD>The same as <CODE>S{0,1}?</CODE>, <CODE>S{0,UINT_MAX}?</CODE>, <CODE>S{1,UINT_MAX}?</CODE>
|
||||
respectively.
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD><!--StartFragment --> (?>S)
|
||||
</TD>
|
||||
<TD>Matches the best match for S, and only that.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>
|
||||
(?=S), (?<=S)
|
||||
</TD>
|
||||
<TD>Matches only the best match for S (this is only visible if there are capturing
|
||||
parenthesis within S).</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD><!--StartFragment --> (?!S), (?<!S)</TD>
|
||||
<TD>Considers only whether a match for S exists or not.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD><!--StartFragment --> (?(condition)yes-pattern | no-pattern)</TD>
|
||||
<TD>If condition is <EM>true</EM>, then only <EM>yes-pattern</EM> is considered,
|
||||
otherwise only <EM>no-pattern</EM> is considered.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H3><A name="variations"></A>Variations</H3>
|
||||
<P>The options <A href="syntax_option_type.html#perl"><EM>normal, ECMAScript, JavaScript</EM>
|
||||
and <EM>JScript</EM></A> are all synonyms for <EM>Perl</EM>.</P>
|
||||
<H3><A name="options"></A>Options</H3>
|
||||
<P>There are a <A href="syntax_option_type.html#Perl">variety of flags</A> that
|
||||
may be combined with the <EM>Perl</EM> option when constructing the regular
|
||||
expression, in particular note that the <A href="syntax_option_type.html#Perl">newline_alt</A>
|
||||
option alters the syntax, while the <A href="syntax_option_type.html#Perl">collate,
|
||||
nosubs and icase</A> options modify how the case and locale sensitivity
|
||||
are to be applied.</P>
|
||||
<H3><A name="mods"></A>Modifiers</H3>
|
||||
<P>The perl <EM>smix</EM> modifiers can either be applied using a (?smix-smix)
|
||||
prefix to the regular expression, or with one of the regex-compile time flags <EM><A href="syntax_option_type.html#Perl">
|
||||
no_mod_m, mod_x, mod_s, and no_mod_s</A></EM>.
|
||||
</P>
|
||||
<H3><A name="refs">References</H3>
|
||||
<P><A href="http://perldoc.perl.org/perlre.html"> Perl 5.8.</A></P>
|
||||
<HR>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
21 Aug 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<P><I><EFBFBD> Copyright <a href="mailto:jm@regex.fsnet.co.uk">John Maddock</a> 2004</I></P>
|
||||
<I>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>).</I></P>
|
||||
</I>
|
||||
</body>
|
||||
</html>
|
@ -1,70 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Thread Safety</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Thread Safety</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<P>The regex library is thread safe when Boost is: you can verify that Boost is in
|
||||
thread safe mode by checking to see if BOOST_HAS_THREADS is defined: this macro
|
||||
is set automatically by the config system when threading support is turned on
|
||||
in your compiler.
|
||||
</P>
|
||||
<P>Class <A href="basic_regex.html">basic_regex</A><> and its typedefs regex
|
||||
and wregex are thread safe, in that compiled regular expressions can safely be
|
||||
shared between threads. The matching algorithms <A href="regex_match.html">regex_match</A>,
|
||||
<A href="regex_search.html">regex_search</A>, <A href="regex_grep.html">regex_grep</A>,
|
||||
<A href="regex_format.html">regex_format</A> and <A href="regex_merge.html">regex_merge</A>
|
||||
are all re-entrant and thread safe. Class <A href="match_results.html">match_results</A>
|
||||
is now thread safe, in that the results of a match can be safely copied from
|
||||
one thread to another (for example one thread may find matches and push
|
||||
match_results instances onto a queue, while another thread pops them off the
|
||||
other end), otherwise use a separate instance of <A href="match_results.html">match_results</A>
|
||||
per thread.
|
||||
</P>
|
||||
<P>The <A href="posix_api.html">POSIX API functions</A> are all re-entrant and
|
||||
thread safe, regular expressions compiled with <I>regcomp</I> can also be
|
||||
shared between threads.
|
||||
</P>
|
||||
<P>The class<A href="regex.html"> RegEx</A> is only thread safe if each thread
|
||||
gets its own RegEx instance (apartment threading) - this is a consequence of
|
||||
RegEx handling both compiling and matching regular expressions.
|
||||
</P>
|
||||
<P>Finally note that changing the global locale invalidates all compiled regular
|
||||
expressions, therefore calling <I>set_locale</I> from one thread while another
|
||||
uses regular expressions <I>will</I> produce unpredictable results.
|
||||
</P>
|
||||
<P>
|
||||
There is also a requirement that there is only one thread executing prior to
|
||||
the start of main().</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
BIN
doc/uarrow.gif
BIN
doc/uarrow.gif
Binary file not shown.
Before Width: | Height: | Size: 1.6 KiB |
@ -1,66 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Boost.Regex: Index</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<P>
|
||||
<TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3>
|
||||
</td>
|
||||
<TD width="353">
|
||||
<H1 align="center">Boost.Regex</H1>
|
||||
<H2 align="center">Unicode Regular Expressions.</H2>
|
||||
</TD>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>There are two ways to use Boost.Regex with Unicode strings:</P>
|
||||
<H3>Rely on wchar_t</H3>
|
||||
<P>If your platform's wchar_t type can hold Unicode strings, <EM>and</EM> your
|
||||
platform's C/C++ runtime correctly handles wide character constants (when
|
||||
passed to std::iswspace std::iswlower etc), then you can use boost::wregex to
|
||||
process Unicode. However, there are several disadvantages to this
|
||||
approach:</P>
|
||||
<UL>
|
||||
<LI>
|
||||
It's not portable: there's no guarantee on the width of wchar_t, or even
|
||||
whether the runtime treats wide characters as Unicode at all, most Windows
|
||||
compilers do so, but many Unix systems do not.</LI>
|
||||
<LI>
|
||||
There's no support for Unicode-specific character classes: [[:Nd:]], [[:Po:]]
|
||||
etc.</LI>
|
||||
<LI>
|
||||
You can only search strings that are encoded as sequences of wide characters,
|
||||
it is not possible to search UTF-8, or even UTF-16 on many platforms.</LI></UL>
|
||||
<H3>Use a Unicode Aware Regular Expression Type.</H3>
|
||||
<P>If you have the <A href="http://www.ibm.com/software/globalization/icu/">ICU
|
||||
library</A>, then Boost.Regex can be <A href="install.html#unicode">configured
|
||||
to make use of it</A>, and provide a distinct regular expression type
|
||||
(boost::u32regex), that supports both Unicode specific character properties,
|
||||
and the searching of text that is encoded in either UTF-8, UTF-16, or
|
||||
UTF-32. See: <A href="icu_strings.html">ICU string class support</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
04 Jan 2005
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2005</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
Reference in New Issue
Block a user