mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 12:07:28 +02:00
Completed perl feature set.
Merged in changes to Docs from main branch. [SVN r22785]
This commit is contained in:
@ -24,6 +24,11 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Captures are the iterator ranges that are "captured" by marked sub-expressions
|
||||
as a regular expression gets matched. Each marked sub-expression can
|
||||
result in more than one capture, if it is matched more than once. This
|
||||
document explains how captures and marked sub-expressions in Boost.Regex are
|
||||
represented and accessed.</P>
|
||||
<H2>Marked sub-expressions</H2>
|
||||
<P>Every time a Perl regular expression contains a parenthesis group (), it spits
|
||||
out an extra field, known as a marked sub-expression, for example the
|
||||
@ -247,4 +252,3 @@ Text: "now is the time for all good men to come to the aid of the party"
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -25,25 +25,32 @@
|
||||
<BR>
|
||||
<BR>
|
||||
<HR>
|
||||
<P>The author can be contacted at john@johnmaddock.co.uk; the
|
||||
home page for this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
|
||||
<P>I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think
|
||||
about algorithms and their performance, and to the folks at boost for forcing
|
||||
me to <I>think</I>, period. The following people have all contributed useful
|
||||
comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree
|
||||
Balasubramanian, Jan B<>lsche, Beman Dawes, Paul Baxter, David Bergman, David
|
||||
Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias
|
||||
Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones,
|
||||
Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens
|
||||
Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie
|
||||
Smith, Mike Smyth, Alexander Sokolovsky, Herv<72> Poirier, Michael Raykh, Marc
|
||||
Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward,
|
||||
Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals
|
||||
supplied with the Henry Spencer, Perl and GNU regular expression libraries -
|
||||
wherever possible I have tried to maintain compatibility with these libraries
|
||||
and with the POSIX standard - the code however is entirely my own, including
|
||||
any bugs! I can absolutely guarantee that I will not fix any bugs I don't know
|
||||
about, so if you have any comments or spot any bugs, please get in touch.</P>
|
||||
<P>The author can be contacted at john@johnmaddock.co.uk; the home page for
|
||||
this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
|
||||
<P>I am indebted to <A href="http://www.cs.princeton.edu/~rs/">Robert Sedgewick's
|
||||
"Algorithms in C++" </A>for forcing me to think about algorithms and their
|
||||
performance, and to the folks at <A href="http://www.boost.org">boost</A> for
|
||||
forcing me to <I>think</I>, period.</P>
|
||||
<P><A href="http://www.boost-consulting.com">Eric Niebler</A>, author of the <A href="http://research.microsoft.com/projects/greta">
|
||||
GRETA regular expression component</A>, has shared several important ideas,
|
||||
in a series of long discussions.</P>
|
||||
<P>Pete Becker, of <A href="http://www.dinkumware.com/">Dinkumware Ltd</A>, has
|
||||
helped enormously with the standardisation proposal language.</P>
|
||||
<P>The following people have all contributed useful comments or fixes: Dave
|
||||
Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan B<>lsche,
|
||||
Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter
|
||||
Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire,
|
||||
Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan
|
||||
Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt,
|
||||
Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky,
|
||||
Herv<EFBFBD> Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey
|
||||
Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and
|
||||
Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer,
|
||||
Perl and GNU regular expression libraries - wherever possible I have tried to
|
||||
maintain compatibility with these libraries and with the POSIX standard - the
|
||||
code however is entirely my own, including any bugs! I can absolutely guarantee
|
||||
that I will not fix any bugs I don't know about, so if you have any comments or
|
||||
spot any bugs, please get in touch.</P>
|
||||
<P>Useful further information can be found at:</P>
|
||||
<P>Short tutorials on regular expressions can be <A href="http://etext.lib.virginia.edu/helpsheets/regex.html">
|
||||
found here</A> and <A href="http://www.devshed.com/Server_Side/Administration/RegExp/page1.html">here</A>.</P>
|
||||
@ -72,8 +79,7 @@
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -1,153 +1,114 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<title>Boost.Regex: FAQ</title>
|
||||
<meta http-equiv="Content-Type" content=
|
||||
"text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
|
||||
"C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
|
||||
<h2 align="center">FAQ</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt=
|
||||
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<hr>
|
||||
<font color="#ff0000"><font color="#ff0000"></font></font>
|
||||
<p><font color="#ff0000"><font color="#ff0000"><font color=
|
||||
"#ff0000"> Q. Why can't I use the "convenience" versions of
|
||||
regex_match / regex_search / regex_grep / regex_format /
|
||||
regex_merge?</font></font></font></p>
|
||||
|
||||
<p>A. These versions may or may not be available depending upon the
|
||||
capabilities of your compiler, the rules determining the format of
|
||||
these functions are quite complex - and only the versions visible
|
||||
to a standard compliant compiler are given in the help. To find out
|
||||
what your compiler supports, run <boost/regex.hpp> through
|
||||
your C++ pre-processor, and search the output file for the function
|
||||
that you are interested in.<font color="#ff0000"><font color=
|
||||
"#ff0000"></font></font></p>
|
||||
|
||||
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get
|
||||
regex++ to work with escape characters, what's going
|
||||
on?</font></font></p>
|
||||
|
||||
<p>A. If you embed regular expressions in C++ code, then remember
|
||||
that escape characters are processed twice: once by the C++
|
||||
compiler, and once by the regex++ expression compiler, so to pass
|
||||
the regular expression \d+ to regex++, you need to embed "\\d+" in
|
||||
your code. Likewise to match a literal backslash you will need to
|
||||
embed "\\\\" in your code. <font color="#ff0000"></font></p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX
|
||||
regular expression change the result of a match?</font></p>
|
||||
|
||||
<p>For POSIX (extended and basic) regular expressions, but not for
|
||||
perl regexes, parentheses don't only mark; they determine what the
|
||||
best match is as well. When the expression is compiled as a POSIX
|
||||
basic or extended regex then Boost.regex follows the POSIX standard
|
||||
leftmost longest rule for determining what matched. So if there is
|
||||
more than one possible match after considering the whole
|
||||
expression, it looks next at the first sub-expression and then the
|
||||
second sub-expression and so on. So...</p>
|
||||
|
||||
<pre>
|
||||
<head>
|
||||
<title>Boost.Regex: FAQ</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">FAQ</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<font color="#ff0000"><font color="#ff0000"></font></font>
|
||||
<p><font color="#ff0000"><font color="#ff0000"><font color="#ff0000"> Q. Why can't I
|
||||
use the "convenience" versions of regex_match / regex_search / regex_grep /
|
||||
regex_format / regex_merge?</font></font></font></p>
|
||||
<p>A. These versions may or may not be available depending upon the capabilities
|
||||
of your compiler, the rules determining the format of these functions are quite
|
||||
complex - and only the versions visible to a standard compliant compiler are
|
||||
given in the help. To find out what your compiler supports, run
|
||||
<boost/regex.hpp> through your C++ pre-processor, and search the output
|
||||
file for the function that you are interested in.<font color="#ff0000"><font color="#ff0000"></font></font></p>
|
||||
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get regex++ to work with
|
||||
escape characters, what's going on?</font></font></p>
|
||||
<p>A. If you embed regular expressions in C++ code, then remember that escape
|
||||
characters are processed twice: once by the C++ compiler, and once by the
|
||||
regex++ expression compiler, so to pass the regular expression \d+ to regex++,
|
||||
you need to embed "\\d+" in your code. Likewise to match a literal backslash
|
||||
you will need to embed "\\\\" in your code. <font color="#ff0000"></font>
|
||||
</p>
|
||||
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX regular expression
|
||||
change the result of a match?</font></p>
|
||||
<p>For POSIX (extended and basic) regular expressions, but not for perl regexes,
|
||||
parentheses don't only mark; they determine what the best match is as well.
|
||||
When the expression is compiled as a POSIX basic or extended regex then
|
||||
Boost.regex follows the POSIX standard leftmost longest rule for determining
|
||||
what matched. So if there is more than one possible match after considering the
|
||||
whole expression, it looks next at the first sub-expression and then the second
|
||||
sub-expression and so on. So...</p>
|
||||
<pre>
|
||||
"(0*)([0-9]*)" against "00123" would produce
|
||||
$1 = "00"
|
||||
$2 = "123"
|
||||
</pre>
|
||||
|
||||
<p>where as</p>
|
||||
|
||||
<pre>
|
||||
"0*([0-9)*" against "00123" would produce
|
||||
<p>where as</p>
|
||||
<pre>
|
||||
"0*([0-9])*" against "00123" would produce
|
||||
$1 = "00123"
|
||||
</pre>
|
||||
|
||||
<p>If you think about it, had $1 only matched the "123", this would
|
||||
be "less good" than the match "00123" which is both further to the
|
||||
left and longer. If you want $1 to match only the "123" part, then
|
||||
you need to use something like:</p>
|
||||
|
||||
<pre>
|
||||
<p>If you think about it, had $1 only matched the "123", this would be "less good"
|
||||
than the match "00123" which is both further to the left and longer. If you
|
||||
want $1 to match only the "123" part, then you need to use something like:</p>
|
||||
<pre>
|
||||
"0*([1-9][0-9]*)"
|
||||
</pre>
|
||||
|
||||
<p>as the expression.</p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why don't character ranges work
|
||||
properly (POSIX mode only)?</font><br>
|
||||
A. The POSIX standard specifies that character range expressions
|
||||
are locale sensitive - so for example the expression [A-Z] will
|
||||
match any collating element that collates between 'A' and 'Z'. That
|
||||
means that for most locales other than "C" or "POSIX", [A-Z] would
|
||||
match the single character 't' for example, which is not what most
|
||||
people expect - or at least not what most people have come to
|
||||
expect from regular expression engines. For this reason, the
|
||||
default behaviour of boost.regex (perl mode) is to turn locale
|
||||
sensitive collation off by not setting the regex_constants::collate
|
||||
compile time flag. However if you set a non-default compile time
|
||||
flag - for example regex_constants::extended or
|
||||
regex_constants::basic, then locale dependent collation will be
|
||||
enabled, this also applies to the POSIX API functions which use
|
||||
either regex_constants::extended or regex_constants::basic
|
||||
internally. <i>[Note - when regex_constants::nocollate in effect,
|
||||
the library behaves "as if" the LC_COLLATE locale category were
|
||||
always "C", regardless of what its actually set to - end
|
||||
note</i>].</p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why are there no throw specifications
|
||||
on any of the functions? What exceptions can the library
|
||||
throw?</font></p>
|
||||
|
||||
<p>A. Not all compilers support (or honor) throw specifications,
|
||||
others support them but with reduced efficiency. Throw
|
||||
specifications may be added at a later date as compilers begin to
|
||||
handle this better. The library should throw only three types of
|
||||
exception: boost::bad_expression can be thrown by basic_regex when
|
||||
compiling a regular expression, std::runtime_error can be thrown
|
||||
when a call to basic_regex::imbue tries to open a message catalogue
|
||||
that doesn't exist, or when a call to regex_search or regex_match
|
||||
results in an "everlasting" search, or when a call to
|
||||
RegEx::GrepFiles or RegEx::FindFiles tries to open a file that
|
||||
cannot be opened, finally std::bad_alloc can be thrown by just
|
||||
about any of the functions in this library.</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<hr>
|
||||
<p>as the expression.</p>
|
||||
<p><font color="#ff0000">Q. Why don't character ranges work properly (POSIX mode
|
||||
only)?</font><br>
|
||||
A. The POSIX standard specifies that character range expressions are locale
|
||||
sensitive - so for example the expression [A-Z] will match any collating
|
||||
element that collates between 'A' and 'Z'. That means that for most locales
|
||||
other than "C" or "POSIX", [A-Z] would match the single character 't' for
|
||||
example, which is not what most people expect - or at least not what most
|
||||
people have come to expect from regular expression engines. For this reason,
|
||||
the default behaviour of boost.regex (perl mode) is to turn locale sensitive
|
||||
collation off by not setting the regex_constants::collate compile time flag.
|
||||
However if you set a non-default compile time flag - for example
|
||||
regex_constants::extended or regex_constants::basic, then locale dependent
|
||||
collation will be enabled, this also applies to the POSIX API functions which
|
||||
use either regex_constants::extended or regex_constants::basic internally. <i>[Note
|
||||
- when regex_constants::nocollate in effect, the library behaves "as if" the
|
||||
LC_COLLATE locale category were always "C", regardless of what its actually set
|
||||
to - end note</i>].</p>
|
||||
<p><font color="#ff0000">Q. Why are there no throw specifications on any of the
|
||||
functions? What exceptions can the library throw?</font></p>
|
||||
<p>A. Not all compilers support (or honor) throw specifications, others support
|
||||
them but with reduced efficiency. Throw specifications may be added at a later
|
||||
date as compilers begin to handle this better. The library should throw only
|
||||
three types of exception: boost::bad_expression can be thrown by basic_regex
|
||||
when compiling a regular expression, std::runtime_error can be thrown when a
|
||||
call to basic_regex::imbue tries to open a message catalogue that doesn't
|
||||
exist, or when a call to regex_search or regex_match results in an
|
||||
"everlasting" search, or when a call to RegEx::GrepFiles or
|
||||
RegEx::FindFiles tries to open a file that cannot be opened, finally
|
||||
std::bad_alloc can be thrown by just about any of the functions in this
|
||||
library.</p>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -26,14 +26,14 @@
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Synopsis</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
|
||||
(17.3.2.1.2) that controls how a regular expression is matched against a
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2) that controls how a regular expression is matched against a
|
||||
character sequence. The behavior of the format flags is descibed in more
|
||||
detail in the <A href="format_syntax.html">format syntax guide</A>.</p>
|
||||
<pre>
|
||||
namespace std{ namespace regex_constants{
|
||||
namespace boost{ namespace regex_constants{
|
||||
|
||||
typedef bitmask_type match_flag_type;
|
||||
typedef <EM>implemenation-specific-bitmask-type</EM> match_flag_type;
|
||||
|
||||
static const match_flag_type match_default = 0;
|
||||
static const match_flag_type match_not_bob;
|
||||
@ -59,11 +59,11 @@ static const match_flag_type format_first_only;
|
||||
static const match_flag_type format_all;
|
||||
|
||||
} // namespace regex_constants
|
||||
} // namespace std
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<h3>Description</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
|
||||
(17.3.2.1.2). When matching a regular expression against a sequence of
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). When matching a regular expression against a sequence of
|
||||
characters [first, last) then setting its elements has the effects listed in
|
||||
the table below:</p>
|
||||
<p></p>
|
||||
@ -271,10 +271,10 @@ static const match_flag_type format_all;
|
||||
<br>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -344,7 +344,7 @@ const_iterator end()const;
|
||||
<p><b>Effects:</b> Returns a terminating iterator that enumerates over all the
|
||||
marked sub-expression matches stored in *this.</p>
|
||||
<h4><A name="format"></A>match_results reformatting</h4>
|
||||
<pre>template <class OutputIterator>
|
||||
<pre><A name=m12></A>template <class OutputIterator>
|
||||
OutputIterator format(OutputIterator out,
|
||||
const string_type& fmt,
|
||||
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,7 +42,7 @@
|
||||
iterator first,
|
||||
iterator last,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default)
|
||||
boost::match_flag_type flags = match_default)
|
||||
</pre>
|
||||
<p>The library also defines the following convenience versions, which take either
|
||||
a const charT*, or a const std::basic_string<>& in place of a pair of
|
||||
@ -53,13 +53,13 @@
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> charT* str,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default);
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits>
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default);
|
||||
boost::match_flag_type flags = match_default);
|
||||
</pre>
|
||||
<p>The parameters for the primary version of regex_grep have the following
|
||||
meanings: </p>
|
||||
@ -370,11 +370,10 @@ index[std::string(what[5].first, what[5].second) + std::string(what[6].first, wh
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -294,7 +294,7 @@ void</B> IndexClasses(map_type& m, <B>const</B> std::string& file)
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
boost::<a href="match_results.html">match_results</a><std::string::const_iterator> what;
|
||||
<B>unsigned</B> <B>int</B> flags = boost::match_default;
|
||||
boost::match_flag_type flags = boost::match_default;
|
||||
<B>while</B>(regex_search(start, end, what, expression, flags))
|
||||
{
|
||||
<FONT color=#000080> <I>// what[0] contains the whole string
|
||||
@ -314,11 +314,10 @@ void</B> IndexClasses(map_type& m, <B>const</B> std::string& file)
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -38,15 +38,15 @@
|
||||
<PRE><B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<B> unsigned</B> flags,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<STRONG> </STRONG>boost::match_flag_type flags,
|
||||
std::size_t max_split);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<B>unsigned</B> flags = match_default);
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
@ -134,11 +134,10 @@ boost::regex e(<FONT color=#000080>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -76,7 +76,7 @@ typedef regex_token_iterator<const char*> cregex_token_i
|
||||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||||
#ifndef BOOST_NO_WREGEX
|
||||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||
#endif
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
@ -84,7 +84,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||||
string for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||||
@ -99,7 +100,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||||
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||||
strings for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||||
@ -118,7 +120,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
<PRE><A name=c4></A>template <std::size_t N>
|
||||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||||
enumerate <EM>R</EM> strings for each regular expression match of the
|
||||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||||
|
@ -24,10 +24,12 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Under construction.</P>
|
||||
<P>The current boost.regex traits class design will be migrated to that specified
|
||||
in the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
|
||||
expression standardization proposal</A>. </P>
|
||||
<P>
|
||||
Under construction: the current design will be replaced by that specified in
|
||||
the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
|
||||
expression standardization proposal</A>, the current (obsolete) design has
|
||||
it's <A href="http://cvs.sourceforge.net/viewcvs.py/*checkout*/boost/boost/libs/regex/Attic/traits_class_ref.htm?rev=1.11">
|
||||
documentation archived online</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
@ -36,11 +38,9 @@
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -91,18 +91,18 @@
|
||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||
and to mark what generated the match. For example the expression "(ab)*" would
|
||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A>
|
||||
each take an instance of <A href="match_results.html">match_results</A>
|
||||
that reports what caused the match, on exit from these functions the <A href="match_results.html">
|
||||
match_results</A> contains information both on what the whole expression
|
||||
matched and on what each sub-expression matched. In the example above
|
||||
match_results[1] would contain a pair of iterators denoting the final "ab" of
|
||||
the matching string. It is permissible for sub-expressions to match null
|
||||
strings. If a sub-expression takes no part in a match - for example if it is
|
||||
part of an alternative that is not taken - then both of the iterators that are
|
||||
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
|
||||
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
|
||||
from left to right starting from 1, sub-expression 0 is the whole expression.
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||
contains information both on what the whole expression matched and on what each
|
||||
sub-expression matched. In the example above match_results[1] would contain a
|
||||
pair of iterators denoting the final "ab" of the matching string. It is
|
||||
permissible for sub-expressions to match null strings. If a sub-expression
|
||||
takes no part in a match - for example if it is part of an alternative that is
|
||||
not taken - then both of the iterators that are returned for that
|
||||
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||
to right starting from 1, sub-expression 0 is the whole expression.
|
||||
</P>
|
||||
<H3>Non-Marking Parenthesis
|
||||
</H3>
|
||||
@ -143,7 +143,7 @@
|
||||
<P>A set is a set of characters that can match any single character that is a
|
||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||
character ranges, character classes, collating elements and equivalence
|
||||
classes. Set declarations that start with "^" contain the compliment of the
|
||||
classes. Set declarations that start with "^" contain the complement of the
|
||||
elements that follow.
|
||||
</P>
|
||||
<P>Examples:
|
||||
@ -293,7 +293,7 @@
|
||||
[^[.ae.]] would only match one character.
|
||||
</P>
|
||||
<P>
|
||||
Equivalence classes take the general form[=tagname=] inside a set declaration,
|
||||
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, and matches any character that is a member of the same primary
|
||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||
@ -302,7 +302,7 @@
|
||||
typically collated by character, then by accent, and then by case; the primary
|
||||
sort key then relates to the character, the secondary to the accentation, and
|
||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
locale independent method of obtaining the primary sort key for a character,
|
||||
except under Win32. For other operating systems the library will "guess" the
|
||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||
@ -666,106 +666,103 @@
|
||||
<H3>What gets matched?
|
||||
</H3>
|
||||
<P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
<P>
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P><P>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
xaby</CODE>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"ab"</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"a"</CODE></P></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
.*([[:alnum:]]+).*</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
" abc def xyz "</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
.*(a|xayy)</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
zzxayyzz</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"zzxayy"</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> xaby</CODE>
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "ab"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "a"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> " abc def xyz "</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*(a|xayy)</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> zzxayyzz</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "zzxayy"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||
that these two regular expression syntaxes differ not only in the features
|
||||
offered, but also in the form that the state machine takes and/or the
|
||||
algorithms used to traverse the state machine.</p>
|
||||
<HR>
|
||||
algorithms used to traverse the state machine.</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -24,13 +24,15 @@
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Synopsis</H3>
|
||||
<P>Type syntax_option type is an implementation defined bitmask type that controls
|
||||
how a regular expression string is to be interpreted. For convenience
|
||||
note that all the constants listed here, are also duplicated within the scope
|
||||
of class template <A href="basic_regex.html">basic_regex</A>.</P>
|
||||
<P>Type syntax_option type is an implementation specific bitmask type that
|
||||
controls how a regular expression string is to be interpreted. For
|
||||
convenience note that all the constants listed here, are also duplicated within
|
||||
the scope of class template <A href="basic_regex.html">basic_regex</A>.</P>
|
||||
<PRE>namespace std{ namespace regex_constants{
|
||||
|
||||
typedef bitmask_type syntax_option_type;
|
||||
typedef <EM>implementation-specific-bitmask-type</EM>
|
||||
|
||||
syntax_option_type;
|
||||
// these flags are standardized:
|
||||
static const syntax_option_type normal;
|
||||
static const syntax_option_type icase;
|
||||
@ -50,7 +52,7 @@ static const syntax_option_type perl;<BR>// these are boost.regex specific:<BR>s
|
||||
} // namespace regex_constants
|
||||
} // namespace std</PRE>
|
||||
<H3>Description</H3>
|
||||
<P>The type <CODE>syntax_option_type</CODE> is an implementation defined bitmask
|
||||
<P>The type <CODE>syntax_option_type</CODE> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). Setting its elements has the effects listed in the table
|
||||
below, a valid value of type <CODE>syntax_option_type</CODE> will always have
|
||||
exactly one of the elements <CODE>normal, basic, extended, awk, grep, egrep, sed
|
||||
@ -314,18 +316,15 @@ static const syntax_option_type perl;<BR>// these are boost.regex specific:<BR>s
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -24,6 +24,11 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Captures are the iterator ranges that are "captured" by marked sub-expressions
|
||||
as a regular expression gets matched. Each marked sub-expression can
|
||||
result in more than one capture, if it is matched more than once. This
|
||||
document explains how captures and marked sub-expressions in Boost.Regex are
|
||||
represented and accessed.</P>
|
||||
<H2>Marked sub-expressions</H2>
|
||||
<P>Every time a Perl regular expression contains a parenthesis group (), it spits
|
||||
out an extra field, known as a marked sub-expression, for example the
|
||||
@ -247,4 +252,3 @@ Text: "now is the time for all good men to come to the aid of the party"
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -25,25 +25,32 @@
|
||||
<BR>
|
||||
<BR>
|
||||
<HR>
|
||||
<P>The author can be contacted at john@johnmaddock.co.uk; the
|
||||
home page for this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
|
||||
<P>I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think
|
||||
about algorithms and their performance, and to the folks at boost for forcing
|
||||
me to <I>think</I>, period. The following people have all contributed useful
|
||||
comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree
|
||||
Balasubramanian, Jan B<>lsche, Beman Dawes, Paul Baxter, David Bergman, David
|
||||
Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias
|
||||
Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones,
|
||||
Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens
|
||||
Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie
|
||||
Smith, Mike Smyth, Alexander Sokolovsky, Herv<72> Poirier, Michael Raykh, Marc
|
||||
Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward,
|
||||
Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals
|
||||
supplied with the Henry Spencer, Perl and GNU regular expression libraries -
|
||||
wherever possible I have tried to maintain compatibility with these libraries
|
||||
and with the POSIX standard - the code however is entirely my own, including
|
||||
any bugs! I can absolutely guarantee that I will not fix any bugs I don't know
|
||||
about, so if you have any comments or spot any bugs, please get in touch.</P>
|
||||
<P>The author can be contacted at john@johnmaddock.co.uk; the home page for
|
||||
this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
|
||||
<P>I am indebted to <A href="http://www.cs.princeton.edu/~rs/">Robert Sedgewick's
|
||||
"Algorithms in C++" </A>for forcing me to think about algorithms and their
|
||||
performance, and to the folks at <A href="http://www.boost.org">boost</A> for
|
||||
forcing me to <I>think</I>, period.</P>
|
||||
<P><A href="http://www.boost-consulting.com">Eric Niebler</A>, author of the <A href="http://research.microsoft.com/projects/greta">
|
||||
GRETA regular expression component</A>, has shared several important ideas,
|
||||
in a series of long discussions.</P>
|
||||
<P>Pete Becker, of <A href="http://www.dinkumware.com/">Dinkumware Ltd</A>, has
|
||||
helped enormously with the standardisation proposal language.</P>
|
||||
<P>The following people have all contributed useful comments or fixes: Dave
|
||||
Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan B<>lsche,
|
||||
Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter
|
||||
Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire,
|
||||
Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan
|
||||
Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt,
|
||||
Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky,
|
||||
Herv<EFBFBD> Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey
|
||||
Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and
|
||||
Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer,
|
||||
Perl and GNU regular expression libraries - wherever possible I have tried to
|
||||
maintain compatibility with these libraries and with the POSIX standard - the
|
||||
code however is entirely my own, including any bugs! I can absolutely guarantee
|
||||
that I will not fix any bugs I don't know about, so if you have any comments or
|
||||
spot any bugs, please get in touch.</P>
|
||||
<P>Useful further information can be found at:</P>
|
||||
<P>Short tutorials on regular expressions can be <A href="http://etext.lib.virginia.edu/helpsheets/regex.html">
|
||||
found here</A> and <A href="http://www.devshed.com/Server_Side/Administration/RegExp/page1.html">here</A>.</P>
|
||||
@ -72,8 +79,7 @@
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
229
doc/faq.html
229
doc/faq.html
@ -1,153 +1,114 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<title>Boost.Regex: FAQ</title>
|
||||
<meta http-equiv="Content-Type" content=
|
||||
"text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
|
||||
border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
|
||||
"C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
|
||||
<h2 align="center">FAQ</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt=
|
||||
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
||||
<hr>
|
||||
<font color="#ff0000"><font color="#ff0000"></font></font>
|
||||
<p><font color="#ff0000"><font color="#ff0000"><font color=
|
||||
"#ff0000"> Q. Why can't I use the "convenience" versions of
|
||||
regex_match / regex_search / regex_grep / regex_format /
|
||||
regex_merge?</font></font></font></p>
|
||||
|
||||
<p>A. These versions may or may not be available depending upon the
|
||||
capabilities of your compiler, the rules determining the format of
|
||||
these functions are quite complex - and only the versions visible
|
||||
to a standard compliant compiler are given in the help. To find out
|
||||
what your compiler supports, run <boost/regex.hpp> through
|
||||
your C++ pre-processor, and search the output file for the function
|
||||
that you are interested in.<font color="#ff0000"><font color=
|
||||
"#ff0000"></font></font></p>
|
||||
|
||||
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get
|
||||
regex++ to work with escape characters, what's going
|
||||
on?</font></font></p>
|
||||
|
||||
<p>A. If you embed regular expressions in C++ code, then remember
|
||||
that escape characters are processed twice: once by the C++
|
||||
compiler, and once by the regex++ expression compiler, so to pass
|
||||
the regular expression \d+ to regex++, you need to embed "\\d+" in
|
||||
your code. Likewise to match a literal backslash you will need to
|
||||
embed "\\\\" in your code. <font color="#ff0000"></font></p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX
|
||||
regular expression change the result of a match?</font></p>
|
||||
|
||||
<p>For POSIX (extended and basic) regular expressions, but not for
|
||||
perl regexes, parentheses don't only mark; they determine what the
|
||||
best match is as well. When the expression is compiled as a POSIX
|
||||
basic or extended regex then Boost.regex follows the POSIX standard
|
||||
leftmost longest rule for determining what matched. So if there is
|
||||
more than one possible match after considering the whole
|
||||
expression, it looks next at the first sub-expression and then the
|
||||
second sub-expression and so on. So...</p>
|
||||
|
||||
<pre>
|
||||
<head>
|
||||
<title>Boost.Regex: FAQ</title>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" type="text/css" href="../../../boost.css">
|
||||
</head>
|
||||
<body>
|
||||
<p></p>
|
||||
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
|
||||
<tr>
|
||||
<td valign="top" width="300">
|
||||
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
|
||||
</td>
|
||||
<td width="353">
|
||||
<h1 align="center">Boost.Regex</h1>
|
||||
<h2 align="center">FAQ</h2>
|
||||
</td>
|
||||
<td width="50">
|
||||
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
<hr>
|
||||
<font color="#ff0000"><font color="#ff0000"></font></font>
|
||||
<p><font color="#ff0000"><font color="#ff0000"><font color="#ff0000"> Q. Why can't I
|
||||
use the "convenience" versions of regex_match / regex_search / regex_grep /
|
||||
regex_format / regex_merge?</font></font></font></p>
|
||||
<p>A. These versions may or may not be available depending upon the capabilities
|
||||
of your compiler, the rules determining the format of these functions are quite
|
||||
complex - and only the versions visible to a standard compliant compiler are
|
||||
given in the help. To find out what your compiler supports, run
|
||||
<boost/regex.hpp> through your C++ pre-processor, and search the output
|
||||
file for the function that you are interested in.<font color="#ff0000"><font color="#ff0000"></font></font></p>
|
||||
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get regex++ to work with
|
||||
escape characters, what's going on?</font></font></p>
|
||||
<p>A. If you embed regular expressions in C++ code, then remember that escape
|
||||
characters are processed twice: once by the C++ compiler, and once by the
|
||||
regex++ expression compiler, so to pass the regular expression \d+ to regex++,
|
||||
you need to embed "\\d+" in your code. Likewise to match a literal backslash
|
||||
you will need to embed "\\\\" in your code. <font color="#ff0000"></font>
|
||||
</p>
|
||||
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX regular expression
|
||||
change the result of a match?</font></p>
|
||||
<p>For POSIX (extended and basic) regular expressions, but not for perl regexes,
|
||||
parentheses don't only mark; they determine what the best match is as well.
|
||||
When the expression is compiled as a POSIX basic or extended regex then
|
||||
Boost.regex follows the POSIX standard leftmost longest rule for determining
|
||||
what matched. So if there is more than one possible match after considering the
|
||||
whole expression, it looks next at the first sub-expression and then the second
|
||||
sub-expression and so on. So...</p>
|
||||
<pre>
|
||||
"(0*)([0-9]*)" against "00123" would produce
|
||||
$1 = "00"
|
||||
$2 = "123"
|
||||
</pre>
|
||||
|
||||
<p>where as</p>
|
||||
|
||||
<pre>
|
||||
"0*([0-9)*" against "00123" would produce
|
||||
<p>where as</p>
|
||||
<pre>
|
||||
"0*([0-9])*" against "00123" would produce
|
||||
$1 = "00123"
|
||||
</pre>
|
||||
|
||||
<p>If you think about it, had $1 only matched the "123", this would
|
||||
be "less good" than the match "00123" which is both further to the
|
||||
left and longer. If you want $1 to match only the "123" part, then
|
||||
you need to use something like:</p>
|
||||
|
||||
<pre>
|
||||
<p>If you think about it, had $1 only matched the "123", this would be "less good"
|
||||
than the match "00123" which is both further to the left and longer. If you
|
||||
want $1 to match only the "123" part, then you need to use something like:</p>
|
||||
<pre>
|
||||
"0*([1-9][0-9]*)"
|
||||
</pre>
|
||||
|
||||
<p>as the expression.</p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why don't character ranges work
|
||||
properly (POSIX mode only)?</font><br>
|
||||
A. The POSIX standard specifies that character range expressions
|
||||
are locale sensitive - so for example the expression [A-Z] will
|
||||
match any collating element that collates between 'A' and 'Z'. That
|
||||
means that for most locales other than "C" or "POSIX", [A-Z] would
|
||||
match the single character 't' for example, which is not what most
|
||||
people expect - or at least not what most people have come to
|
||||
expect from regular expression engines. For this reason, the
|
||||
default behaviour of boost.regex (perl mode) is to turn locale
|
||||
sensitive collation off by not setting the regex_constants::collate
|
||||
compile time flag. However if you set a non-default compile time
|
||||
flag - for example regex_constants::extended or
|
||||
regex_constants::basic, then locale dependent collation will be
|
||||
enabled, this also applies to the POSIX API functions which use
|
||||
either regex_constants::extended or regex_constants::basic
|
||||
internally. <i>[Note - when regex_constants::nocollate in effect,
|
||||
the library behaves "as if" the LC_COLLATE locale category were
|
||||
always "C", regardless of what its actually set to - end
|
||||
note</i>].</p>
|
||||
|
||||
<p><font color="#ff0000">Q. Why are there no throw specifications
|
||||
on any of the functions? What exceptions can the library
|
||||
throw?</font></p>
|
||||
|
||||
<p>A. Not all compilers support (or honor) throw specifications,
|
||||
others support them but with reduced efficiency. Throw
|
||||
specifications may be added at a later date as compilers begin to
|
||||
handle this better. The library should throw only three types of
|
||||
exception: boost::bad_expression can be thrown by basic_regex when
|
||||
compiling a regular expression, std::runtime_error can be thrown
|
||||
when a call to basic_regex::imbue tries to open a message catalogue
|
||||
that doesn't exist, or when a call to regex_search or regex_match
|
||||
results in an "everlasting" search, or when a call to
|
||||
RegEx::GrepFiles or RegEx::FindFiles tries to open a file that
|
||||
cannot be opened, finally std::bad_alloc can be thrown by just
|
||||
about any of the functions in this library.</p>
|
||||
|
||||
<p></p>
|
||||
|
||||
<hr>
|
||||
<p>as the expression.</p>
|
||||
<p><font color="#ff0000">Q. Why don't character ranges work properly (POSIX mode
|
||||
only)?</font><br>
|
||||
A. The POSIX standard specifies that character range expressions are locale
|
||||
sensitive - so for example the expression [A-Z] will match any collating
|
||||
element that collates between 'A' and 'Z'. That means that for most locales
|
||||
other than "C" or "POSIX", [A-Z] would match the single character 't' for
|
||||
example, which is not what most people expect - or at least not what most
|
||||
people have come to expect from regular expression engines. For this reason,
|
||||
the default behaviour of boost.regex (perl mode) is to turn locale sensitive
|
||||
collation off by not setting the regex_constants::collate compile time flag.
|
||||
However if you set a non-default compile time flag - for example
|
||||
regex_constants::extended or regex_constants::basic, then locale dependent
|
||||
collation will be enabled, this also applies to the POSIX API functions which
|
||||
use either regex_constants::extended or regex_constants::basic internally. <i>[Note
|
||||
- when regex_constants::nocollate in effect, the library behaves "as if" the
|
||||
LC_COLLATE locale category were always "C", regardless of what its actually set
|
||||
to - end note</i>].</p>
|
||||
<p><font color="#ff0000">Q. Why are there no throw specifications on any of the
|
||||
functions? What exceptions can the library throw?</font></p>
|
||||
<p>A. Not all compilers support (or honor) throw specifications, others support
|
||||
them but with reduced efficiency. Throw specifications may be added at a later
|
||||
date as compilers begin to handle this better. The library should throw only
|
||||
three types of exception: boost::bad_expression can be thrown by basic_regex
|
||||
when compiling a regular expression, std::runtime_error can be thrown when a
|
||||
call to basic_regex::imbue tries to open a message catalogue that doesn't
|
||||
exist, or when a call to regex_search or regex_match results in an
|
||||
"everlasting" search, or when a call to RegEx::GrepFiles or
|
||||
RegEx::FindFiles tries to open a file that cannot be opened, finally
|
||||
std::bad_alloc can be thrown by just about any of the functions in this
|
||||
library.</p>
|
||||
<p></p>
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -46,10 +46,10 @@
|
||||
<dl class="index">
|
||||
<dt><a href="syntax_option_type.html">syntax_option_type</a></dt> <dt><a href="match_flag_type.html">
|
||||
match_flag_type</a></dt> <dt><a href="bad_expression.html">class bad_expression</a></dt>
|
||||
<dt><a href="regex_traits.html">class regex_traits</a></dt> <dt><a href="basic_regex.html">
|
||||
class template basic_regex</a></dt> <dt><a href="sub_match.html">class template
|
||||
sub_match</a></dt> <dt><a href="match_results.html">class template
|
||||
match_results</a></dt>
|
||||
<dt><a href="regex_traits.html">class regex_traits</a></dt>
|
||||
<dt><a href="basic_regex.html">class template basic_regex</a></dt>
|
||||
<dt><a href="sub_match.html">class template sub_match</a></dt>
|
||||
<dt><a href="match_results.html">class template match_results</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt>Algorithms</dt>
|
||||
@ -66,6 +66,25 @@
|
||||
<dt><a href="regex_token_iterator.html">regex_token_iterator</a></dt>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt>Typedefs</dt>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
<dt><a href="basic_regex.html">regex</a> [ = basic_regex<char> ]</dt>
|
||||
<dt><a href="basic_regex.html">wregex</a> [ = basic_regex<wchar_t> ]</dt>
|
||||
<dt><a href="match_results.html">cmatch</a> [ = match_results<const char*> ]</dt>
|
||||
<dt><a href="match_results.html">wcmatch</a> [ = match_results<const wchar_t*> ]</dt>
|
||||
<dt><a href="match_results.html">smatch</a> [ = match_results<std::string::const_iterator> ]</dt>
|
||||
<dt><a href="match_results.html">wsmatch</a> [ = match_results<std::wstring::const_iterator> ]</dt>
|
||||
<dt><a href="regex_iterator.html">cregex_iterator</a> [ = regex_iterator<const char*>]</dt>
|
||||
<dt><a href="regex_iterator.html">wcregex_iterator</a> [ = regex_iterator<const wchar_t*>]</dt>
|
||||
<dt><a href="regex_iterator.html">sregex_iterator</a> [ = regex_iterator<std::string::const_iterator>]</dt>
|
||||
<dt><a href="regex_iterator.html">wsregex_iterator</a> [ = regex_iterator<std::wstring::const_iterator>]</dt>
|
||||
<dt><a href="regex_token_iterator.html">cregex_token_iterator</a> [ = regex_token_iterator<const char*>]</dt>
|
||||
<dt><a href="regex_token_iterator.html">wcregex_token_iterator</a> [ = regex_token_iterator<const wchar_t*>]</dt>
|
||||
<dt><a href="regex_token_iterator.html">sregex_token_iterator</a> [ = regex_token_iterator<std::string::const_iterator>]</dt>
|
||||
<dt><a href="regex_token_iterator.html">wsregex_token_iterator</a> [ = regex_token_iterator<std::wstring::const_iterator>]</dt>
|
||||
</dl>
|
||||
</dd>
|
||||
<dt>Misc.</dt>
|
||||
<dd>
|
||||
<dl class="index">
|
||||
|
@ -26,14 +26,14 @@
|
||||
<br>
|
||||
<hr>
|
||||
<h3>Synopsis</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
|
||||
(17.3.2.1.2) that controls how a regular expression is matched against a
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2) that controls how a regular expression is matched against a
|
||||
character sequence. The behavior of the format flags is descibed in more
|
||||
detail in the <A href="format_syntax.html">format syntax guide</A>.</p>
|
||||
<pre>
|
||||
namespace std{ namespace regex_constants{
|
||||
namespace boost{ namespace regex_constants{
|
||||
|
||||
typedef bitmask_type match_flag_type;
|
||||
typedef <EM>implemenation-specific-bitmask-type</EM> match_flag_type;
|
||||
|
||||
static const match_flag_type match_default = 0;
|
||||
static const match_flag_type match_not_bob;
|
||||
@ -59,11 +59,11 @@ static const match_flag_type format_first_only;
|
||||
static const match_flag_type format_all;
|
||||
|
||||
} // namespace regex_constants
|
||||
} // namespace std
|
||||
} // namespace boost
|
||||
</pre>
|
||||
<h3>Description</h3>
|
||||
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
|
||||
(17.3.2.1.2). When matching a regular expression against a sequence of
|
||||
<p>The type <code>match_flag_type</code> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). When matching a regular expression against a sequence of
|
||||
characters [first, last) then setting its elements has the effects listed in
|
||||
the table below:</p>
|
||||
<p></p>
|
||||
@ -271,10 +271,10 @@ static const match_flag_type format_all;
|
||||
<br>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -344,7 +344,7 @@ const_iterator end()const;
|
||||
<p><b>Effects:</b> Returns a terminating iterator that enumerates over all the
|
||||
marked sub-expression matches stored in *this.</p>
|
||||
<h4><A name="format"></A>match_results reformatting</h4>
|
||||
<pre>template <class OutputIterator>
|
||||
<pre><A name=m12></A>template <class OutputIterator>
|
||||
OutputIterator format(OutputIterator out,
|
||||
const string_type& fmt,
|
||||
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);
|
||||
|
998
doc/regex.html
998
doc/regex.html
File diff suppressed because it is too large
Load Diff
@ -42,7 +42,7 @@
|
||||
iterator first,
|
||||
iterator last,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default)
|
||||
boost::match_flag_type flags = match_default)
|
||||
</pre>
|
||||
<p>The library also defines the following convenience versions, which take either
|
||||
a const charT*, or a const std::basic_string<>& in place of a pair of
|
||||
@ -53,13 +53,13 @@
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> charT* str,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default);
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<b>template</b> <<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits>
|
||||
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
|
||||
<b>const</b> std::basic_string<charT, ST, SA>& s,
|
||||
<b>const</b> basic_regex<charT, traits, Allocator>& e,
|
||||
<b>unsigned</b> flags = match_default);
|
||||
boost::match_flag_type flags = match_default);
|
||||
</pre>
|
||||
<p>The parameters for the primary version of regex_grep have the following
|
||||
meanings: </p>
|
||||
@ -370,11 +370,10 @@ index[std::string(what[5].first, what[5].second) + std::string(what[6].first, wh
|
||||
<hr>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -294,7 +294,7 @@ void</B> IndexClasses(map_type& m, <B>const</B> std::string& file)
|
||||
start = file.begin();
|
||||
end = file.end();
|
||||
boost::<a href="match_results.html">match_results</a><std::string::const_iterator> what;
|
||||
<B>unsigned</B> <B>int</B> flags = boost::match_default;
|
||||
boost::match_flag_type flags = boost::match_default;
|
||||
<B>while</B>(regex_search(start, end, what, expression, flags))
|
||||
{
|
||||
<FONT color=#000080> <I>// what[0] contains the whole string
|
||||
@ -314,11 +314,10 @@ void</B> IndexClasses(map_type& m, <B>const</B> std::string& file)
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -38,15 +38,15 @@
|
||||
<PRE><B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<B> unsigned</B> flags,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<STRONG> </STRONG>boost::match_flag_type flags,
|
||||
std::size_t max_split);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
std::basic_string<charT, Traits1, Alloc1>& s,
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
<B>unsigned</B> flags = match_default);
|
||||
<B> const</B> basic_regex<charT, Traits2, Alloc2>& e,
|
||||
boost::match_flag_type flags = match_default);
|
||||
|
||||
<B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1>
|
||||
std::size_t regex_split(OutputIterator out,
|
||||
@ -134,11 +134,10 @@ boost::regex e(<FONT color=#000080>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
04 Feb 2004
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -76,7 +76,7 @@ typedef regex_token_iterator<const char*> cregex_token_i
|
||||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||||
#ifndef BOOST_NO_WREGEX
|
||||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||
#endif
|
||||
</PRE>
|
||||
<H3><A name="description"></A>Description</H3>
|
||||
@ -84,7 +84,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||||
string for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||||
@ -99,7 +100,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
configured</A> in non-recursive mode).</P>
|
||||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||||
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||||
strings for each regular expression match of the expression <EM>re</EM> found
|
||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||||
@ -118,7 +120,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
||||
<PRE><A name=c4></A>template <std::size_t N>
|
||||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||
for the lifetime of the iterator constructed from it.</P>
|
||||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||||
enumerate <EM>R</EM> strings for each regular expression match of the
|
||||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||||
|
@ -24,10 +24,12 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>Under construction.</P>
|
||||
<P>The current boost.regex traits class design will be migrated to that specified
|
||||
in the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
|
||||
expression standardization proposal</A>. </P>
|
||||
<P>
|
||||
Under construction: the current design will be replaced by that specified in
|
||||
the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
|
||||
expression standardization proposal</A>, the current (obsolete) design has
|
||||
it's <A href="http://cvs.sourceforge.net/viewcvs.py/*checkout*/boost/boost/libs/regex/Attic/traits_class_ref.htm?rev=1.11">
|
||||
documentation archived online</A>.</P>
|
||||
<P>
|
||||
<HR>
|
||||
<P></P>
|
||||
@ -36,11 +38,9 @@
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
199
doc/syntax.html
199
doc/syntax.html
@ -91,18 +91,18 @@
|
||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||
and to mark what generated the match. For example the expression "(ab)*" would
|
||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A>
|
||||
each take an instance of <A href="match_results.html">match_results</A>
|
||||
that reports what caused the match, on exit from these functions the <A href="match_results.html">
|
||||
match_results</A> contains information both on what the whole expression
|
||||
matched and on what each sub-expression matched. In the example above
|
||||
match_results[1] would contain a pair of iterators denoting the final "ab" of
|
||||
the matching string. It is permissible for sub-expressions to match null
|
||||
strings. If a sub-expression takes no part in a match - for example if it is
|
||||
part of an alternative that is not taken - then both of the iterators that are
|
||||
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
|
||||
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
|
||||
from left to right starting from 1, sub-expression 0 is the whole expression.
|
||||
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||
contains information both on what the whole expression matched and on what each
|
||||
sub-expression matched. In the example above match_results[1] would contain a
|
||||
pair of iterators denoting the final "ab" of the matching string. It is
|
||||
permissible for sub-expressions to match null strings. If a sub-expression
|
||||
takes no part in a match - for example if it is part of an alternative that is
|
||||
not taken - then both of the iterators that are returned for that
|
||||
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||
to right starting from 1, sub-expression 0 is the whole expression.
|
||||
</P>
|
||||
<H3>Non-Marking Parenthesis
|
||||
</H3>
|
||||
@ -143,7 +143,7 @@
|
||||
<P>A set is a set of characters that can match any single character that is a
|
||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||
character ranges, character classes, collating elements and equivalence
|
||||
classes. Set declarations that start with "^" contain the compliment of the
|
||||
classes. Set declarations that start with "^" contain the complement of the
|
||||
elements that follow.
|
||||
</P>
|
||||
<P>Examples:
|
||||
@ -293,7 +293,7 @@
|
||||
[^[.ae.]] would only match one character.
|
||||
</P>
|
||||
<P>
|
||||
Equivalence classes take the general form[=tagname=] inside a set declaration,
|
||||
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||
where <I>tagname</I> is either a single character, or a name of a collating
|
||||
element, and matches any character that is a member of the same primary
|
||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||
@ -302,7 +302,7 @@
|
||||
typically collated by character, then by accent, and then by case; the primary
|
||||
sort key then relates to the character, the secondary to the accentation, and
|
||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||
locale independent method of obtaining the primary sort key for a character,
|
||||
except under Win32. For other operating systems the library will "guess" the
|
||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||
@ -666,106 +666,103 @@
|
||||
<H3>What gets matched?
|
||||
</H3>
|
||||
<P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
When the expression is compiled as a Perl-compatible regex then the matching
|
||||
algorithms will perform a depth first search on the state machine and report
|
||||
the first match found.</P>
|
||||
<P>
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P><P>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||
algorithms will match the first possible matching string, if more than one
|
||||
string starting at a given location can match then it matches the longest
|
||||
possible string, unless the flag match_any is set, in which case the first
|
||||
match encountered is returned. Use of the match_any option can reduce the time
|
||||
taken to find the match - but is only useful if the user is less concerned
|
||||
about what matched - for example it would not be suitable for search and
|
||||
replace operations. In cases where their are multiple possible matches all
|
||||
starting at the same location, and all of the same length, then the match
|
||||
chosen is the one with the longest first sub-expression, if that is the same
|
||||
for two or more matches, then the second sub-expression will be examined and so
|
||||
on.
|
||||
</P>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
xaby</CODE>
|
||||
The following table examples illustrate the main differences between Perl and
|
||||
POSIX regular expression matching rules:
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"ab"</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"a"</CODE></P></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
.*([[:alnum:]]+).*</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
" abc def xyz "</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
.*(a|xayy)</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
zzxayyzz</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>
|
||||
"zzxayy"</CODE></P></TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>
|
||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||
<TBODY>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Expression</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>Text</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>POSIX leftmost longest match</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>ECMAScript depth first search match</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>a|ab</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> xaby</CODE>
|
||||
</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "ab"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "a"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> " abc def xyz "</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "abc"</P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P>$0 = " abc def xyz "<BR>
|
||||
$1 = "z"</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> .*(a|xayy)</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> zzxayyzz</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE> "zzxayy"</CODE></P>
|
||||
</TD>
|
||||
<TD vAlign="top" width="25%">
|
||||
<P><CODE>"zzxa"</CODE></P>
|
||||
</TD>
|
||||
</TR>
|
||||
</TBODY></CODE></TD></TR></TABLE>
|
||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||
that these two regular expression syntaxes differ not only in the features
|
||||
offered, but also in the form that the state machine takes and/or the
|
||||
algorithms used to traverse the state machine.</p>
|
||||
<HR>
|
||||
algorithms used to traverse the state machine.</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -24,13 +24,15 @@
|
||||
</P>
|
||||
<HR>
|
||||
<H3>Synopsis</H3>
|
||||
<P>Type syntax_option type is an implementation defined bitmask type that controls
|
||||
how a regular expression string is to be interpreted. For convenience
|
||||
note that all the constants listed here, are also duplicated within the scope
|
||||
of class template <A href="basic_regex.html">basic_regex</A>.</P>
|
||||
<P>Type syntax_option type is an implementation specific bitmask type that
|
||||
controls how a regular expression string is to be interpreted. For
|
||||
convenience note that all the constants listed here, are also duplicated within
|
||||
the scope of class template <A href="basic_regex.html">basic_regex</A>.</P>
|
||||
<PRE>namespace std{ namespace regex_constants{
|
||||
|
||||
typedef bitmask_type syntax_option_type;
|
||||
typedef <EM>implementation-specific-bitmask-type</EM>
|
||||
|
||||
syntax_option_type;
|
||||
// these flags are standardized:
|
||||
static const syntax_option_type normal;
|
||||
static const syntax_option_type icase;
|
||||
@ -50,7 +52,7 @@ static const syntax_option_type perl;<BR>// these are boost.regex specific:<BR>s
|
||||
} // namespace regex_constants
|
||||
} // namespace std</PRE>
|
||||
<H3>Description</H3>
|
||||
<P>The type <CODE>syntax_option_type</CODE> is an implementation defined bitmask
|
||||
<P>The type <CODE>syntax_option_type</CODE> is an implementation specific bitmask
|
||||
type (17.3.2.1.2). Setting its elements has the effects listed in the table
|
||||
below, a valid value of type <CODE>syntax_option_type</CODE> will always have
|
||||
exactly one of the elements <CODE>normal, basic, extended, awk, grep, egrep, sed
|
||||
@ -314,18 +316,15 @@ static const syntax_option_type perl;<BR>// these are boost.regex specific:<BR>s
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
|
@ -185,6 +185,14 @@ public:
|
||||
{
|
||||
return m_pdata->m_flags;
|
||||
}
|
||||
void flags(regbase::flag_type f)
|
||||
{
|
||||
m_pdata->m_flags = f;
|
||||
if(m_icase != static_cast<bool>(f & regbase::icase))
|
||||
{
|
||||
m_icase = static_cast<bool>(f & regbase::icase);
|
||||
}
|
||||
}
|
||||
re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
||||
re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
|
||||
re_literal* append_literal(charT c);
|
||||
@ -633,10 +641,19 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
// recursive implementation:
|
||||
// create the last map in the machine first, so that earlier maps
|
||||
// can make use of the result...
|
||||
|
||||
// start by saving the case setting:
|
||||
bool l_icase = m_icase;
|
||||
|
||||
while(state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_toggle_case:
|
||||
// we need to track case changes here:
|
||||
m_icase = static_cast<re_case*>(state)->icase;
|
||||
state = state->next.p;
|
||||
continue;
|
||||
case syntax_element_alt:
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
@ -652,6 +669,8 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
|
||||
// adjust the type of the state to allow for faster matching:
|
||||
state->type = this->get_repeat_type(state);
|
||||
// restore case sensitivity:
|
||||
m_icase = l_icase;
|
||||
return;
|
||||
case syntax_element_backstep:
|
||||
// we need to calculate how big the backstep is:
|
||||
@ -662,6 +681,8 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
state = state->next.p;
|
||||
}
|
||||
}
|
||||
// restore case sensitivity:
|
||||
m_icase = l_icase;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
@ -726,10 +747,18 @@ template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
|
||||
{
|
||||
int not_last_jump = 1;
|
||||
|
||||
// track case sensitivity:
|
||||
bool l_icase = m_icase;
|
||||
|
||||
while(state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_toggle_case:
|
||||
l_icase = static_cast<re_case*>(state)->icase;
|
||||
state = state->next.p;
|
||||
break;
|
||||
case syntax_element_literal:
|
||||
{
|
||||
// don't set anything in *pnull, set each element in l_map
|
||||
@ -740,7 +769,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
if(m_traits.translate(static_cast<charT>(i), m_icase) == first_char)
|
||||
if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
|
||||
l_map[i] |= mask;
|
||||
}
|
||||
}
|
||||
@ -826,7 +855,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
charT c = static_cast<charT>(i);
|
||||
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
|
||||
if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata, m_icase))
|
||||
l_map[i] |= mask;
|
||||
}
|
||||
}
|
||||
@ -841,7 +870,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
|
||||
{
|
||||
if(static_cast<re_set*>(state)->_map[
|
||||
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), this->m_icase))])
|
||||
static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
|
||||
l_map[i] |= mask;
|
||||
}
|
||||
}
|
||||
|
@ -53,6 +53,7 @@ public:
|
||||
bool parse_perl_extension();
|
||||
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
|
||||
charT unescape_character();
|
||||
regex_constants::syntax_option_type parse_options();
|
||||
|
||||
private:
|
||||
typedef bool (basic_regex_parser::*parser_proc_type)();
|
||||
@ -65,6 +66,7 @@ private:
|
||||
unsigned m_mark_count; // how many sub-expressions we have
|
||||
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
||||
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
||||
bool m_has_case_change; // true if somewhere in the current block the case has changed
|
||||
|
||||
basic_regex_parser& operator=(const basic_regex_parser&);
|
||||
basic_regex_parser(const basic_regex_parser&);
|
||||
@ -72,7 +74,7 @@ private:
|
||||
|
||||
template <class charT, class traits>
|
||||
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
|
||||
{
|
||||
}
|
||||
|
||||
@ -103,6 +105,8 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2,
|
||||
|
||||
// parse all our characters:
|
||||
bool result = parse_all();
|
||||
// reset flags as a global scope (?imsx) may have altered them:
|
||||
this->flags(flags);
|
||||
// if we haven't gobbled up all the characters then we must
|
||||
// have had an unexpected ')' :
|
||||
if(!result)
|
||||
@ -184,11 +188,13 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
return parse_match_any();
|
||||
case regex_constants::syntax_caret:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_start_line);
|
||||
this->append_state(
|
||||
(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
|
||||
break;
|
||||
case regex_constants::syntax_dollar:
|
||||
++m_position;
|
||||
this->append_state(syntax_element_end_line);
|
||||
this->append_state(
|
||||
(this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
|
||||
break;
|
||||
case regex_constants::syntax_star:
|
||||
if(m_position == this->m_base)
|
||||
@ -218,6 +224,19 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
return parse_alt();
|
||||
case regex_constants::syntax_open_set:
|
||||
return parse_set();
|
||||
case regex_constants::syntax_hash:
|
||||
//
|
||||
// If we have a mod_x flag set, then skip until
|
||||
// we get to a newline character:
|
||||
//
|
||||
if((this->flags()
|
||||
& (regbase::no_perl_ex|regbase::mod_x))
|
||||
== regbase::mod_x)
|
||||
{
|
||||
while((m_position != m_end) && !is_separator(*m_position++)){}
|
||||
return true;
|
||||
}
|
||||
// Otherwise fall through:
|
||||
default:
|
||||
result = parse_literal();
|
||||
break;
|
||||
@ -231,7 +250,14 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_literal()
|
||||
{
|
||||
this->append_literal(*m_position);
|
||||
// append this as a literal provided it's not a space character
|
||||
// or the perl option regbase::mod_x is not set:
|
||||
if(
|
||||
((this->flags()
|
||||
& (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
|
||||
!= regbase::mod_x)
|
||||
|| !this->m_traits.is_class(*m_position, this->m_mask_space))
|
||||
this->append_literal(*m_position);
|
||||
++m_position;
|
||||
return true;
|
||||
}
|
||||
@ -266,11 +292,31 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
//
|
||||
// back up the current flags in case we have a nested (?imsx) group:
|
||||
//
|
||||
regex_constants::syntax_option_type opts = this->flags();
|
||||
bool old_case_change = m_has_case_change;
|
||||
m_has_case_change = false; // no changes to this scope as yet...
|
||||
//
|
||||
// now recursively add more states, this will terminate when we get to a
|
||||
// matching ')' :
|
||||
//
|
||||
parse_all();
|
||||
//
|
||||
// restore flags:
|
||||
//
|
||||
if(m_has_case_change)
|
||||
{
|
||||
// the case has changed in one or more of the alternatives
|
||||
// within the scoped (...) block: we have to add a state
|
||||
// to reset the case sensitivity:
|
||||
static_cast<re_case*>(
|
||||
this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
||||
)->icase = opts & regbase::icase;
|
||||
}
|
||||
this->flags(opts);
|
||||
m_has_case_change = old_case_change;
|
||||
//
|
||||
// we either have a ')' or we have run out of characters prematurely:
|
||||
//
|
||||
if(m_position == m_end)
|
||||
@ -436,7 +482,12 @@ bool basic_regex_parser<charT, traits>::parse_match_any()
|
||||
// we have a '.' that can match any character:
|
||||
//
|
||||
++m_position;
|
||||
this->append_state(syntax_element_wild);
|
||||
static_cast<re_dot*>(
|
||||
this->append_state(syntax_element_wild, sizeof(re_dot))
|
||||
)->mask = this->flags() & regbase::no_mod_s
|
||||
? re_detail::force_not_newline
|
||||
: this->flags() & regbase::mod_s ?
|
||||
re_detail::force_newline : re_detail::dont_care;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -609,10 +660,13 @@ bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||
++m_position;
|
||||
//
|
||||
// we need to append a trailing jump, then insert the alternative:
|
||||
// we need to append a trailing jump:
|
||||
//
|
||||
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
|
||||
std::ptrdiff_t jump_offset = this->getoffset(pj);
|
||||
//
|
||||
// now insert the alternative:
|
||||
//
|
||||
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
|
||||
jump_offset += re_alt_size;
|
||||
this->m_pdata->m_data.align();
|
||||
@ -623,6 +677,16 @@ bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
//
|
||||
this->m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
//
|
||||
// the start of this alternative must have a case changes state
|
||||
// if the current block has messed around with case changes:
|
||||
//
|
||||
if(m_has_case_change)
|
||||
{
|
||||
static_cast<re_case*>(
|
||||
this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
||||
)->icase = this->m_icase;
|
||||
}
|
||||
//
|
||||
// recursively add states:
|
||||
//
|
||||
bool result = this->parse_all();
|
||||
@ -633,7 +697,7 @@ bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||
//
|
||||
// fix up the jump we added to point to the end of the states
|
||||
// that we're just added:
|
||||
// that we've just added:
|
||||
//
|
||||
this->m_pdata->m_data.align();
|
||||
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
||||
@ -1083,7 +1147,13 @@ bool basic_regex_parser<charT, traits>::parse_QE()
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
|
||||
++m_position;
|
||||
if((m_position == m_end) || (++m_position == m_end)) // skip the escape
|
||||
if(m_position == m_end)
|
||||
{
|
||||
// a \Q...\E sequence may terminate with the end of the expression:
|
||||
end = m_position;
|
||||
break;
|
||||
}
|
||||
if(++m_position == m_end) // skip the escape
|
||||
{
|
||||
fail(REG_EESCAPE, m_position - m_base);
|
||||
return false;
|
||||
@ -1117,6 +1187,18 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
//
|
||||
// treat comments as a special case, as these
|
||||
// are the only ones that don't start with a leading
|
||||
// startmark state:
|
||||
//
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
|
||||
{
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
|
||||
{}
|
||||
return true;
|
||||
}
|
||||
//
|
||||
// backup some state, and prepare the way:
|
||||
//
|
||||
int markid = 0;
|
||||
@ -1128,6 +1210,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
std::ptrdiff_t expected_alt_point = m_alt_insert_point;
|
||||
bool restore_flags = true;
|
||||
regex_constants::syntax_option_type old_flags = this->flags();
|
||||
bool old_case_change = m_has_case_change;
|
||||
m_has_case_change = false;
|
||||
//
|
||||
// select the actual extension used:
|
||||
//
|
||||
@ -1140,15 +1226,6 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
pb->index = markid = 0;
|
||||
++m_position;
|
||||
break;
|
||||
case regex_constants::syntax_hash:
|
||||
//
|
||||
// a comment; this actually becomes an empty non-capturing mark:
|
||||
//
|
||||
pb->index = markid = 0;
|
||||
while((m_position != m_end)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
++m_position;
|
||||
break;
|
||||
case regex_constants::syntax_equal:
|
||||
pb->index = markid = -1;
|
||||
++m_position;
|
||||
@ -1235,7 +1312,37 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
//
|
||||
// lets assume that we have a (?imsx) group and try and parse it:
|
||||
//
|
||||
regex_constants::syntax_option_type opts = parse_options();
|
||||
// make a note of whether we have a case change:
|
||||
m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
|
||||
pb->index = markid = 0;
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
|
||||
{
|
||||
// update flags and carry on as normal:
|
||||
this->flags(opts);
|
||||
restore_flags = false;
|
||||
old_case_change |= m_has_case_change; // defer end of scope by one ')'
|
||||
}
|
||||
else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
|
||||
{
|
||||
// update flags and carry on until the matching ')' is found:
|
||||
this->flags(opts);
|
||||
++m_position;
|
||||
}
|
||||
else
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
|
||||
// finally append a case change state if we need it:
|
||||
if(m_has_case_change)
|
||||
{
|
||||
static_cast<re_case*>(
|
||||
this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
||||
)->icase = opts & regbase::icase;
|
||||
}
|
||||
|
||||
}
|
||||
//
|
||||
// now recursively add more states, this will terminate when we get to a
|
||||
@ -1250,6 +1357,20 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
||||
++m_position;
|
||||
//
|
||||
// restore the flags:
|
||||
//
|
||||
if(restore_flags)
|
||||
{
|
||||
// append a case change state if we need it:
|
||||
if(m_has_case_change)
|
||||
{
|
||||
static_cast<re_case*>(
|
||||
this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
||||
)->icase = old_flags & regbase::icase;
|
||||
}
|
||||
this->flags(old_flags);
|
||||
}
|
||||
//
|
||||
// set up the jump pointer if we have one:
|
||||
//
|
||||
if(jump_offset)
|
||||
@ -1275,6 +1396,8 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
|
||||
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
|
||||
}
|
||||
else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
|
||||
fail(REG_BADPAT, m_position - m_base);
|
||||
}
|
||||
//
|
||||
// append closing parenthesis state:
|
||||
@ -1286,9 +1409,79 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
// restore the alternate insertion point:
|
||||
//
|
||||
this->m_alt_insert_point = last_alt_point;
|
||||
//
|
||||
// and the case change data:
|
||||
//
|
||||
m_has_case_change = old_case_change;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
|
||||
{
|
||||
// we have a (?imsx-imsx) group, convert it into a set of flags:
|
||||
regex_constants::syntax_option_type f = this->flags();
|
||||
bool breakout = false;
|
||||
do
|
||||
{
|
||||
switch(*m_position)
|
||||
{
|
||||
case 's':
|
||||
f |= regex_constants::mod_s;
|
||||
f &= ~regex_constants::no_mod_s;
|
||||
break;
|
||||
case 'm':
|
||||
f &= ~regex_constants::no_mod_m;
|
||||
break;
|
||||
case 'i':
|
||||
f |= regex_constants::icase;
|
||||
break;
|
||||
case 'x':
|
||||
f |= regex_constants::mod_x;
|
||||
break;
|
||||
default:
|
||||
breakout = true;
|
||||
continue;
|
||||
}
|
||||
if(++m_position == m_end)
|
||||
fail(REG_EPAREN, m_position - m_base);
|
||||
}
|
||||
while(!breakout);
|
||||
|
||||
if(*m_position == '-')
|
||||
{
|
||||
if(++m_position == m_end)
|
||||
fail(REG_EPAREN, m_position - m_base);
|
||||
do
|
||||
{
|
||||
switch(*m_position)
|
||||
{
|
||||
case 's':
|
||||
f &= ~regex_constants::mod_s;
|
||||
f |= regex_constants::no_mod_s;
|
||||
break;
|
||||
case 'm':
|
||||
f |= regex_constants::no_mod_m;
|
||||
break;
|
||||
case 'i':
|
||||
f &= ~regex_constants::icase;
|
||||
break;
|
||||
case 'x':
|
||||
f &= ~regex_constants::mod_x;
|
||||
break;
|
||||
default:
|
||||
breakout = true;
|
||||
continue;
|
||||
}
|
||||
if(++m_position == m_end)
|
||||
fail(REG_EPAREN, m_position - m_base);
|
||||
}
|
||||
while(!breakout);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
|
||||
|
@ -91,12 +91,12 @@ template <class iterator, class charT, class traits_type, class char_classT>
|
||||
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
iterator last,
|
||||
const re_set_long<char_classT>* set_,
|
||||
const regex_data<charT, traits_type>& e)
|
||||
const regex_data<charT, traits_type>& e, bool icase)
|
||||
{
|
||||
const charT* p = reinterpret_cast<const charT*>(set_+1);
|
||||
iterator ptr;
|
||||
unsigned int i;
|
||||
bool icase = e.m_flags & regex_constants::icase;
|
||||
//bool icase = e.m_flags & regex_constants::icase;
|
||||
|
||||
if(next == last) return next;
|
||||
|
||||
@ -335,6 +335,7 @@ private:
|
||||
bool match_dot_repeat_slow();
|
||||
bool match_backstep();
|
||||
bool match_assert_backref();
|
||||
bool match_toggle_case();
|
||||
bool backtrack_till_match(unsigned count);
|
||||
|
||||
// find procs stored in s_find_vtable:
|
||||
@ -385,6 +386,8 @@ private:
|
||||
repeater_count<BidiIterator> rep_obj;
|
||||
// the mask to pass when matching word boundaries:
|
||||
typename traits::char_class_type m_word_mask;
|
||||
// the bitmask to use when determining whether a match_any matches a newline or not:
|
||||
unsigned char match_any_mask;
|
||||
|
||||
#ifdef BOOST_REGEX_NON_RECURSIVE
|
||||
//
|
||||
|
@ -73,6 +73,8 @@ perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first,
|
||||
// find the value to use for matching word boundaries:
|
||||
const char_type w = static_cast<char_type>('w');
|
||||
m_word_mask = traits_inst.lookup_classname(&w, &w+1);
|
||||
// find bitmask to use for matching '.':
|
||||
match_any_mask = (f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
@ -395,7 +397,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
|
||||
{
|
||||
if(position == last)
|
||||
return false;
|
||||
if(is_separator(*position) && (m_match_flags & match_not_dot_newline))
|
||||
if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
|
||||
return false;
|
||||
if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
|
||||
return false;
|
||||
@ -590,7 +592,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
|
||||
// let the traits class do the work:
|
||||
if(position == last)
|
||||
return false;
|
||||
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data());
|
||||
BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
|
||||
if(t != position)
|
||||
{
|
||||
pstate = pstate->next.p;
|
||||
@ -678,6 +680,16 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
|
||||
return (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
|
||||
{
|
||||
// change our case sensitivity:
|
||||
this->icase = static_cast<const re_case*>(pstate)->icase;
|
||||
pstate = pstate->next.p;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
|
||||
{
|
||||
|
@ -113,7 +113,7 @@ struct saved_single_repeat : public saved_state
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[28] =
|
||||
static matcher_proc_type const s_match_vtable[29] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -143,6 +143,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
|
||||
};
|
||||
|
||||
push_recursion_stopper();
|
||||
@ -575,7 +576,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
|
||||
{
|
||||
if(m_match_flags & (match_not_dot_newline | match_not_dot_null))
|
||||
if(m_match_flags & match_not_dot_null)
|
||||
return match_dot_repeat_slow();
|
||||
if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
|
||||
return match_dot_repeat_slow();
|
||||
|
||||
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
|
||||
@ -763,7 +766,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
BidiIterator end = position;
|
||||
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
|
||||
BidiIterator origin(position);
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
@ -771,7 +774,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
}
|
||||
else
|
||||
{
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -1269,7 +1272,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
|
||||
// wind forward until we can skip out of the repeat:
|
||||
do
|
||||
{
|
||||
if(position == re_is_set_member(position, last, set, re.get_data()))
|
||||
if(position == re_is_set_member(position, last, set, re.get_data(), icase))
|
||||
{
|
||||
// failed repeat match, discard this state and look for another:
|
||||
destroy_single_repeat();
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[28] =
|
||||
static matcher_proc_type const s_match_vtable[29] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -78,6 +78,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
|
||||
};
|
||||
|
||||
if(state_count > max_state_count)
|
||||
@ -427,7 +428,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4127)
|
||||
#endif
|
||||
if(m_match_flags & (match_not_dot_newline | match_not_dot_null))
|
||||
if(m_match_flags & match_not_dot_null)
|
||||
return match_dot_repeat_slow();
|
||||
if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
|
||||
return match_dot_repeat_slow();
|
||||
//
|
||||
// start by working out how much we can skip:
|
||||
@ -670,7 +673,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
BidiIterator end = position;
|
||||
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
|
||||
BidiIterator origin(position);
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
@ -678,7 +681,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
}
|
||||
else
|
||||
{
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
|
||||
while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -698,7 +701,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
{
|
||||
while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
|
||||
{
|
||||
if(position != re_is_set_member(position, last, set, re.get_data()))
|
||||
if(position != re_is_set_member(position, last, set, re.get_data(), icase))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
@ -718,7 +721,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
if(position == last)
|
||||
return false;
|
||||
position = save_pos;
|
||||
if(position != re_is_set_member(position, last, set, re.get_data()))
|
||||
if(position != re_is_set_member(position, last, set, re.get_data(), icase))
|
||||
{
|
||||
++position;
|
||||
++count;
|
||||
|
@ -52,6 +52,10 @@ public:
|
||||
//
|
||||
no_bk_refs = 1 << 8, // \d not allowed
|
||||
no_perl_ex = 1 << 9, // disable perl extensions
|
||||
no_mod_m = 1 << 10, // disable Perl m modifier
|
||||
mod_x = 1 << 11, // Perl x modifier
|
||||
mod_s = 1 << 12, // force s modifier on (overrides match_not_dot_newline)
|
||||
no_mod_s = 1 << 13, // force s modifier off (overrides match_not_dot_newline)
|
||||
|
||||
//
|
||||
// options specific to basic group:
|
||||
@ -125,6 +129,10 @@ namespace regex_constants{
|
||||
no_intervals = ::boost::regbase::no_intervals,
|
||||
no_char_classes = ::boost::regbase::no_char_classes,
|
||||
no_escape_in_lists = ::boost::regbase::no_escape_in_lists,
|
||||
no_mod_m = ::boost::regbase::no_mod_m,
|
||||
mod_x = ::boost::regbase::mod_x,
|
||||
mod_s = ::boost::regbase::mod_s,
|
||||
no_mod_s = ::boost::regbase::no_mod_s,
|
||||
|
||||
basic = ::boost::regbase::basic,
|
||||
extended = ::boost::regbase::extended,
|
||||
|
@ -110,7 +110,8 @@ enum syntax_element_type
|
||||
// a backstep for lookbehind repeats:
|
||||
syntax_element_backstep = syntax_element_long_set_rep + 1,
|
||||
// an assertion that a mark was matched:
|
||||
syntax_element_assert_backref = syntax_element_backstep +1
|
||||
syntax_element_assert_backref = syntax_element_backstep + 1,
|
||||
syntax_element_toggle_case = syntax_element_assert_backref + 1
|
||||
};
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
@ -141,7 +142,7 @@ struct re_syntax_base
|
||||
};
|
||||
|
||||
/*** struct re_brace **************************************************
|
||||
Base class for all states in the machine.
|
||||
A marked parenthesis.
|
||||
***********************************************************************/
|
||||
struct re_brace : public re_syntax_base
|
||||
{
|
||||
@ -150,6 +151,23 @@ struct re_brace : public re_syntax_base
|
||||
int index;
|
||||
};
|
||||
|
||||
/*** struct re_dot **************************************************
|
||||
Match anything.
|
||||
***********************************************************************/
|
||||
enum
|
||||
{
|
||||
dont_care = 1,
|
||||
force_not_newline = 0,
|
||||
force_newline = 2,
|
||||
|
||||
test_not_newline = 2,
|
||||
test_newline = 3,
|
||||
};
|
||||
struct re_dot : public re_syntax_base
|
||||
{
|
||||
unsigned char mask;
|
||||
};
|
||||
|
||||
/*** struct re_literal ************************************************
|
||||
A string of literals, following this structure will be an
|
||||
array of characters: charT[length]
|
||||
@ -159,6 +177,14 @@ struct re_literal : public re_syntax_base
|
||||
unsigned int length;
|
||||
};
|
||||
|
||||
/*** struct re_case ************************************************
|
||||
Indicates whether we are moving to a case insensive block or not
|
||||
***********************************************************************/
|
||||
struct re_case : public re_syntax_base
|
||||
{
|
||||
bool icase;
|
||||
};
|
||||
|
||||
/*** struct re_set_long ***********************************************
|
||||
A wide character set of characters, following this structure will be
|
||||
an array of type charT:
|
||||
@ -234,7 +260,7 @@ template <class iterator, class charT, class traits_type, class char_classT>
|
||||
iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
|
||||
iterator last,
|
||||
const re_set_long<char_classT>* set_,
|
||||
const regex_data<charT, traits_type>& e);
|
||||
const regex_data<charT, traits_type>& e, bool icase);
|
||||
|
||||
} // namespace re_detail
|
||||
|
||||
|
@ -496,7 +496,6 @@ void test_character_escapes()
|
||||
TEST_INVALID_REGEX("\\x{yy", perl);
|
||||
TEST_INVALID_REGEX("\\x{1b", perl);
|
||||
// \Q...\E sequences:
|
||||
TEST_INVALID_REGEX("\\Qabc", perl);
|
||||
TEST_INVALID_REGEX("\\Qabc\\", perl);
|
||||
TEST_REGEX_SEARCH("\\Qabc\\E", perl, "abcd", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\Qabc\\Ed", perl, "abcde", match_default, make_array(0, 4, -2, -2));
|
||||
@ -1364,8 +1363,504 @@ void test_conditionals()
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12aa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "1234", match_default, make_array(-2, -2));
|
||||
|
||||
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
|
||||
TEST_INVALID_REGEX("(a)(?(1)a|b|c)", perl);
|
||||
TEST_INVALID_REGEX("(?(?=a)a|b|c)", perl);
|
||||
TEST_INVALID_REGEX("(?(1a)", perl);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void test_options()
|
||||
{
|
||||
// test the (?imsx) construct:
|
||||
using namespace boost::regex_constants;
|
||||
TEST_INVALID_REGEX("(?imsx", perl);
|
||||
TEST_INVALID_REGEX("(?g", perl);
|
||||
TEST_INVALID_REGEX("(?im-sx", perl);
|
||||
TEST_INVALID_REGEX("(?im-sx:", perl);
|
||||
TEST_REGEX_SEARCH("(?-m)^abc", perl, "abc\nabc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?m)^abc", perl|no_mod_m, "abc\nabc", match_default, make_array(0, 3, -2, 4, 7, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-m)^abc", perl, "abc\nabc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?m)^abc", perl|no_mod_m, "abc\nabc", match_default, make_array(0, 3, -2, 4, 7, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH(" ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl|mod_x, "ab c", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH(" ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl|mod_x, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(" ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl|mod_x, "ab cde", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?x) ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl, "ab c", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?x) ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl, "abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?x) ^ a (?# begins with a) b\\sc (?# then b c) $ (?# then end)", perl, "ab cde", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ a\\ b[c ]d $", perl|mod_x, "a bcd", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("^ a\\ b[c ]d $", perl|mod_x, "a b d", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("^ a\\ b[c ]d $", perl|mod_x, "abcd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^ a\\ b[c ]d $", perl|mod_x, "ab d", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^1234(?# test newlines\n inside)", perl|mod_x, "1234", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^1234 #comment in extended re\n", perl|mod_x, "1234", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("#rhubarb\n abcd", perl|mod_x, "abcd", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^1234 #comment in extended re\r\n", perl|mod_x, "1234", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("#rhubarb\r\n abcd", perl|mod_x, "abcd", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^abcd#rhubarb", perl|mod_x, "abcd", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^abcd#rhubarb", perl, "abcd#rhubarb", match_default, make_array(0, 12, -2, -2));
|
||||
TEST_REGEX_SEARCH("^a b\n\n c", perl|mod_x, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?(?=[^a-z]+[a-z]) \\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} ) ", perl|mod_x, "12-sep-98", match_default, make_array(0, 9, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?=[^a-z]+[a-z]) \\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} ) ", perl|mod_x, "12-09-98", match_default, make_array(0, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?=[^a-z]+[a-z]) \\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} ) ", perl|mod_x, "sep-12-98", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^a (?#xxx) (?#yyy) {3}c", perl|mod_x, "aaac", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("ab", perl|mod_x, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH(" abc\\Q abc\\Eabc", perl|mod_x, "abc abcabc", match_default, make_array(0, 10, -2, -2));
|
||||
TEST_REGEX_SEARCH(" abc\\Q abc\\Eabc", perl|mod_x, "abcabcabc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("abc#comment\n \\Q#not comment\n literal\\E", perl|mod_x, "abc#not comment\n literal", match_default, make_array(0, 27, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc#comment\n \\Q#not comment\n literal", perl|mod_x, "abc#not comment\n literal", match_default, make_array(0, 27, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc#comment\n \\Q#not comment\n literal\\E #more comment\n ", perl|mod_x, "abc#not comment\n literal", match_default, make_array(0, 27, -2, -2));
|
||||
TEST_REGEX_SEARCH("abc#comment\n \\Q#not comment\n literal\\E #more comment", perl|mod_x, "abc#not comment\n literal", match_default, make_array(0, 27, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a bcd e", match_default, make_array(0, 7, 0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a b cd e", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "abcd e", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a bcde", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a b(?x)c d (?-x)e f)", perl, "a bcde f", match_default, make_array(0, 8, 0, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a b(?x)c d (?-x)e f)", perl, "abcdef", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a(?x: b c )d", perl, "XabcdY", match_default, make_array(1, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?x: b c )d", perl, "Xa b c d Y", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("((?x)x y z | a b c)", perl, "XabcY", match_default, make_array(1, 4, 1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?x)x y z | a b c)", perl, "AxyzB", match_default, make_array(1, 4, 1, 4, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a bcd e", match_default, make_array(0, 7, 0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a b cd e", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "abcd e", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a (?x)b c)d e", perl, "a bcde", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a b(?x)c d (?-x)e f)", perl, "a bcde f", match_default, make_array(0, 8, 0, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a b(?x)c d (?-x)e f)", perl, "abcdef", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "abc", match_default, make_array(0, 3, 0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "aBc", match_default, make_array(0, 3, 0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "abC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "aBC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "Abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "ABc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "ABC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)b)c", perl, "AbC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "abc", match_default, make_array(0, 3, 0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "aBc", match_default, make_array(0, 3, 0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "abC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "aBC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "Abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "ABc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "ABC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a(?i)B)c", perl, "AbC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a(?i:b)c", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)c", perl, "aBc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)c", perl, "ABC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)c", perl, "abC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)c", perl, "aBC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBBc", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(?i:b)*c", perl, "aBBC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "abcd", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "abCd", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "aBCd", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(?=b(?i)c)\\w\\wd", perl, "abcD", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?s-i:more.*than).*million", perl|icase, "more than million", match_default, make_array(0, 17, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s-i:more.*than).*million", perl|icase, "more than MILLION", match_default, make_array(0, 17, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s-i:more.*than).*million", perl|icase, "more \n than Million", match_default, make_array(0, 19, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s-i:more.*than).*million", perl|icase, "MORE THAN MILLION", match_default, make_array(-2, -2));
|
||||
//TEST_REGEX_SEARCH("(?s-i:more.*than).*million", perl|icase, "more \n than \n million", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?:(?s-i)more.*than).*million", perl|icase, "more than million", match_default, make_array(0, 17, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?s-i)more.*than).*million", perl|icase, "more than MILLION", match_default, make_array(0, 17, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?s-i)more.*than).*million", perl|icase, "more \n than Million", match_default, make_array(0, 19, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?s-i)more.*than).*million", perl|icase, "MORE THAN MILLION", match_default, make_array(-2, -2));
|
||||
//TEST_REGEX_SEARCH("(?:(?s-i)more.*than).*million", perl|icase, "more \n than \n million", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "aBbc", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "aBBc", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "Abc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "abAb", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?>a(?i)b+)+c", perl, "abbC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?=a(?i)b)\\w\\wc", perl, "abc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?=a(?i)b)\\w\\wc", perl, "aBc", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?=a(?i)b)\\w\\wc", perl, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?=a(?i)b)\\w\\wc", perl, "abC", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?=a(?i)b)\\w\\wc", perl, "aBC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "abxxc", match_default, make_array(2, 5, 2, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "aBxxc", match_default, make_array(2, 5, 2, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "Abxxc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "ABxxc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?<=a(?i)b)(\\w\\w)c", perl, "abxxC", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "abxyZZ", match_default, make_array(4, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "abXyZZ", match_default, make_array(4, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "ZZZ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "zZZ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "bZZ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "BZZ", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "ZZ", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "abXYZZ", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "zzz", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:ab(?i)x(?-i)y|(?i)z|b)ZZ", perl, "bzz", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("((?-i)[[:lower:]])[[:lower:]]", perl|icase, "ab", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)[[:lower:]])[[:lower:]]", perl|icase, "aB", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)[[:lower:]])[[:lower:]]", perl|icase, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)[[:lower:]])[[:lower:]]", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a(?-i)b", perl|icase, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?-i)b", perl|icase, "Ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(?-i)b", perl|icase, "aB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("a(?-i)b", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)a)b", perl|icase, "ab", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "aB", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)a)b", perl|icase, "aB", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "aB", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i)a)b", perl|icase, "aB", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(?-i)a)b", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "ab", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i:a))b", perl|icase, "ab", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "aB", match_default, make_array(0, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("((?-i:a))b", perl|icase, "aB", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "aB", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i:a))b", perl|icase, "aB", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "Ab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?-i:a)b", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i:a.))b", perl|icase, "AB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("((?-i:a.))b", perl|icase, "A\nB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("((?s-i:a.))b", perl|icase, "a\nB", match_default, make_array(0, 3, 0, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH(".", perl, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(".", perl, "\n", match_default|match_not_dot_newline, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(".", perl|mod_s, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(".", perl|mod_s, "\n", match_default|match_not_dot_newline, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH(".", perl|no_mod_s, "\n", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH(".", perl|no_mod_s, "\n", match_default|match_not_dot_newline, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?s).", perl, "\n", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s).", perl, "\n", match_default|match_not_dot_newline, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-s).", perl, "\n", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?-s).", perl, "\n", match_default|match_not_dot_newline, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH(".+", perl, " \n ", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+", perl, " \n ", match_default|match_not_dot_newline, make_array(0, 2, -2, 3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+", perl|mod_s, " \n ", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+", perl|mod_s, " \n ", match_default|match_not_dot_newline, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+", perl|no_mod_s, " \n ", match_default, make_array(0, 2, -2, 3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+", perl|no_mod_s, " \n ", match_default|match_not_dot_newline, make_array(0, 2, -2, 3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s).+", perl, " \n ", match_default, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?s).+", perl, " \n ", match_default|match_not_dot_newline, make_array(0, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-s).+", perl, " \n ", match_default, make_array(0, 2, -2, 3, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?-s).+", perl, " \n ", match_default|match_not_dot_newline, make_array(0, 2, -2, 3, 5, -2, -2));
|
||||
|
||||
const char* big_expression =
|
||||
" (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* # optional leading comment\n"
|
||||
"(?: (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"|\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
") # initial word\n"
|
||||
"(?: (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"|\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
") )* # further okay, if led by a period\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* @ (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # initial subdomain\n"
|
||||
"(?: #\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. # if led by a period...\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # ...further okay\n"
|
||||
")*\n"
|
||||
"# address\n"
|
||||
"| # or\n"
|
||||
"(?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"|\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
") # one word, optionally followed by....\n"
|
||||
"(?:\n"
|
||||
"[^()<>@,;:\".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037] | # atom and space parts, or...\n"
|
||||
"\\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) | # comments, or...\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
"# quoted strings\n"
|
||||
")*\n"
|
||||
"< (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* # leading <\n"
|
||||
"(?: @ (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # initial subdomain\n"
|
||||
"(?: #\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. # if led by a period...\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # ...further okay\n"
|
||||
")*\n"
|
||||
"(?: (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* , (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* @ (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # initial subdomain\n"
|
||||
"(?: #\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. # if led by a period...\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # ...further okay\n"
|
||||
")*\n"
|
||||
")* # further okay, if led by comma\n"
|
||||
": # closing colon\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* )? # optional route\n"
|
||||
"(?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"|\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
") # initial word\n"
|
||||
"(?: (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"|\n"
|
||||
"\" (?: # opening quote...\n"
|
||||
"[^\\\\\\x80-\\xff\\n\\015\"] # Anything except backslash and quote\n"
|
||||
"| # or\n"
|
||||
"\\\\ [^\\x80-\\xff] # Escaped something (something != CR)\n"
|
||||
")* \" # closing quote\n"
|
||||
") )* # further okay, if led by a period\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* @ (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # initial subdomain\n"
|
||||
"(?: #\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* \\. # if led by a period...\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* (?:\n"
|
||||
"[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+ # some number of atom characters...\n"
|
||||
"(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]) # ..not followed by something that could be part of an atom\n"
|
||||
"| \\[ # [\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015\\[\\]] | \\\\ [^\\x80-\\xff] )* # stuff\n"
|
||||
"\\] # ]\n"
|
||||
") # ...further okay\n"
|
||||
")*\n"
|
||||
"# address spec\n"
|
||||
"(?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* > # trailing >\n"
|
||||
"# name and address\n"
|
||||
") (?: [\\040\\t] | \\(\n"
|
||||
"(?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] | \\( (?: [^\\\\\\x80-\\xff\\n\\015()] | \\\\ [^\\x80-\\xff] )* \\) )*\n"
|
||||
"\\) )* # optional trailing comment\n"
|
||||
"\n";
|
||||
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "Alan Other <user@dom.ain>", match_default,
|
||||
make_array(0, 25, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"Alan Other <user@dom.ain>", match_default,
|
||||
make_array(0, 25, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "<user@dom.ain>", match_default,
|
||||
make_array(1, 13, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"<user@dom.ain>", match_default,
|
||||
make_array(1, 13, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "\"A. Other\" <user.1234@dom.ain> (a comment)", match_default,
|
||||
make_array(0, 42, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"\"A. Other\" <user.1234@dom.ain> (a comment)", match_default,
|
||||
make_array(0, 42, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "A. Other <user.1234@dom.ain> (a comment)", match_default,
|
||||
make_array(2, 40, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"A. Other <user.1234@dom.ain> (a comment)", match_default,
|
||||
make_array(2, 40, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"@x400-re.lay", match_default,
|
||||
make_array(0, 61, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"@x400-re.lay", match_default,
|
||||
make_array(0, 61, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
do{
|
||||
test_info<char>::set_info(__FILE__, __LINE__,
|
||||
big_expression,
|
||||
perl|mod_x, "A missing angle <user@some.where", match_default,
|
||||
make_array(17, 32, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)
|
||||
do{
|
||||
std::string st(big_expression);
|
||||
test_info<wchar_t>::set_info(__FILE__, __LINE__,
|
||||
std::wstring(st.begin(), st.end()),
|
||||
perl|mod_x, L"A missing angle <user@some.where", match_default,
|
||||
make_array(17, 32, -2, -2));
|
||||
test(char(0), test_regex_search_tag());
|
||||
}while(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,7 @@ int cpp_main(int argc, char * argv[])
|
||||
test_independent_subs();
|
||||
test_nosubs();
|
||||
test_conditionals();
|
||||
test_options();
|
||||
return error_count;
|
||||
}
|
||||
|
||||
|
@ -143,5 +143,6 @@ void test_tricky_cases2();
|
||||
void test_independent_subs();
|
||||
void test_nosubs();
|
||||
void test_conditionals();
|
||||
void test_options();
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user