Compare commits

...

32 Commits

Author SHA1 Message Date
6732abe03e HP aCC patches.
[SVN r23169]
2004-06-23 12:44:17 +00:00
eec095b121 Fixed bug with grepping for literals.
Added Sunpro workaround.


[SVN r23093]
2004-06-12 10:10:07 +00:00
bd8de55174 64-bit portability fix
[SVN r23065]
2004-06-09 12:12:42 +00:00
3fd3f23e6e HP aCC workaround
[SVN r23064]
2004-06-09 12:12:01 +00:00
cf6f69258c 64-bit portability fix
[SVN r23063]
2004-06-09 11:23:52 +00:00
d1649417d1 Added patches for Sunpro and HP aCC
[SVN r23031]
2004-06-05 11:46:05 +00:00
5fed1798dd Altered initialiser order to stop gcc from complaining.
[SVN r22876]
2004-05-20 10:16:09 +00:00
b0e83b535a patch for nosubs flag bug
[SVN r22822]
2004-05-14 10:28:09 +00:00
91e7e3b602 Fixed \l and \u so that they work correctly in case insensitive mode.
[SVN r22804]
2004-05-12 11:39:28 +00:00
8d76f35c64 Patched for problem with wide character optimisations.
[SVN r22724]
2004-05-03 11:31:15 +00:00
92bf09979a Changed ar to $(AR).
[SVN r22717]
2004-05-01 10:53:14 +00:00
ca9a970d6f Don't optimise leading repeats when there are backreferences involved.
[SVN r22705]
2004-04-25 10:32:09 +00:00
2816f0e1ca Added introductory comment
[SVN r22641]
2004-04-17 10:56:46 +00:00
afd2bf331a Fixes for POSIX regexes not grepping correctly.
[SVN r22623]
2004-04-09 15:55:32 +00:00
fb844f004e Added fix for grepping for POSIX style matches.
[SVN r22621]
2004-04-07 11:04:06 +00:00
283548f948 Added missing hyperlink target.
[SVN r22619]
2004-04-07 10:45:53 +00:00
77595dddba Added link to old docs.
[SVN r22530]
2004-03-20 12:12:03 +00:00
edc9f4d288 Added missing preconditions
[SVN r22528]
2004-03-20 11:53:18 +00:00
f60ed016e7 Don't try to export symbols that don't need it.
[SVN r22524]
2004-03-19 12:40:39 +00:00
5503fa8fb7 Fixed typo.
[SVN r22523]
2004-03-19 12:39:56 +00:00
5cce92fa23 Added typedefs to the index.
[SVN r22522]
2004-03-19 12:39:30 +00:00
b35a6eabbd Added missing typedefs.
[SVN r22521]
2004-03-19 12:38:44 +00:00
62e79baf65 Fixed typo.
[SVN r22437]
2004-03-05 11:31:43 +00:00
f7abd42972 Added extra non-greedy repeat tests
[SVN r22421]
2004-03-02 16:57:36 +00:00
82abe57838 removed unnessary #if
[SVN r22420]
2004-03-02 16:56:56 +00:00
5babdbfa82 Fixed nasty non-greedy repeat bug
[SVN r22419]
2004-03-02 16:56:28 +00:00
f8cd505f18 Updated iterator defs
[SVN r22388]
2004-02-25 12:37:52 +00:00
3b3becb57c Added extra acknowledgements
[SVN r22384]
2004-02-25 12:34:42 +00:00
d717e0a962 Fixed typos reported in release candidate.
[SVN r22161]
2004-02-04 15:24:31 +00:00
8a2e6a5dfb Added more tests to catch some additional regressions.
[SVN r21526]
2004-01-07 12:02:58 +00:00
b984803077 Added last minute patch for .*?$ failing in the non-recursive algorithm.
[SVN r21525]
2004-01-07 12:00:51 +00:00
a952ab8c15 This commit was manufactured by cvs2svn to create branch 'RC_1_31_0'.
[SVN r21427]
2003-12-30 12:10:04 +00:00
42 changed files with 1552 additions and 1782 deletions

View File

@ -18,6 +18,7 @@
# compiler:
CXX=g++
LINKER=g++ -shared
AR=ar
#
# compiler options for release build:
@ -94,8 +95,8 @@ boost_regex-gcc-1_31_clean :
rm -f gcc/boost_regex-gcc-1_31/*.o
./gcc/libboost_regex-gcc-1_31.a : gcc/boost_regex-gcc-1_31/c_regex_traits.o gcc/boost_regex-gcc-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-1_31/cregex.o gcc/boost_regex-gcc-1_31/fileiter.o gcc/boost_regex-gcc-1_31/instances.o gcc/boost_regex-gcc-1_31/posix_api.o gcc/boost_regex-gcc-1_31/regex.o gcc/boost_regex-gcc-1_31/regex_debug.o gcc/boost_regex-gcc-1_31/regex_synch.o gcc/boost_regex-gcc-1_31/w32_regex_traits.o gcc/boost_regex-gcc-1_31/wide_posix_api.o gcc/boost_regex-gcc-1_31/winstances.o
ar -r gcc/libboost_regex-gcc-1_31.a gcc/boost_regex-gcc-1_31/c_regex_traits.o gcc/boost_regex-gcc-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-1_31/cregex.o gcc/boost_regex-gcc-1_31/fileiter.o gcc/boost_regex-gcc-1_31/instances.o gcc/boost_regex-gcc-1_31/posix_api.o gcc/boost_regex-gcc-1_31/regex.o gcc/boost_regex-gcc-1_31/regex_debug.o gcc/boost_regex-gcc-1_31/regex_synch.o gcc/boost_regex-gcc-1_31/w32_regex_traits.o gcc/boost_regex-gcc-1_31/wide_posix_api.o gcc/boost_regex-gcc-1_31/winstances.o
-ar -s gcc/libboost_regex-gcc-1_31.a
$(AR) -r gcc/libboost_regex-gcc-1_31.a gcc/boost_regex-gcc-1_31/c_regex_traits.o gcc/boost_regex-gcc-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-1_31/cregex.o gcc/boost_regex-gcc-1_31/fileiter.o gcc/boost_regex-gcc-1_31/instances.o gcc/boost_regex-gcc-1_31/posix_api.o gcc/boost_regex-gcc-1_31/regex.o gcc/boost_regex-gcc-1_31/regex_debug.o gcc/boost_regex-gcc-1_31/regex_synch.o gcc/boost_regex-gcc-1_31/w32_regex_traits.o gcc/boost_regex-gcc-1_31/wide_posix_api.o gcc/boost_regex-gcc-1_31/winstances.o
-$(AR) -s gcc/libboost_regex-gcc-1_31.a
########################################################
#
@ -148,6 +149,6 @@ boost_regex-gcc-d-1_31_clean :
rm -f gcc/boost_regex-gcc-d-1_31/*.o
./gcc/libboost_regex-gcc-d-1_31.a : gcc/boost_regex-gcc-d-1_31/c_regex_traits.o gcc/boost_regex-gcc-d-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-d-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-d-1_31/cregex.o gcc/boost_regex-gcc-d-1_31/fileiter.o gcc/boost_regex-gcc-d-1_31/instances.o gcc/boost_regex-gcc-d-1_31/posix_api.o gcc/boost_regex-gcc-d-1_31/regex.o gcc/boost_regex-gcc-d-1_31/regex_debug.o gcc/boost_regex-gcc-d-1_31/regex_synch.o gcc/boost_regex-gcc-d-1_31/w32_regex_traits.o gcc/boost_regex-gcc-d-1_31/wide_posix_api.o gcc/boost_regex-gcc-d-1_31/winstances.o
ar -r gcc/libboost_regex-gcc-d-1_31.a gcc/boost_regex-gcc-d-1_31/c_regex_traits.o gcc/boost_regex-gcc-d-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-d-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-d-1_31/cregex.o gcc/boost_regex-gcc-d-1_31/fileiter.o gcc/boost_regex-gcc-d-1_31/instances.o gcc/boost_regex-gcc-d-1_31/posix_api.o gcc/boost_regex-gcc-d-1_31/regex.o gcc/boost_regex-gcc-d-1_31/regex_debug.o gcc/boost_regex-gcc-d-1_31/regex_synch.o gcc/boost_regex-gcc-d-1_31/w32_regex_traits.o gcc/boost_regex-gcc-d-1_31/wide_posix_api.o gcc/boost_regex-gcc-d-1_31/winstances.o
-ar -s gcc/libboost_regex-gcc-d-1_31.a
$(AR) -r gcc/libboost_regex-gcc-d-1_31.a gcc/boost_regex-gcc-d-1_31/c_regex_traits.o gcc/boost_regex-gcc-d-1_31/c_regex_traits_common.o gcc/boost_regex-gcc-d-1_31/cpp_regex_traits.o gcc/boost_regex-gcc-d-1_31/cregex.o gcc/boost_regex-gcc-d-1_31/fileiter.o gcc/boost_regex-gcc-d-1_31/instances.o gcc/boost_regex-gcc-d-1_31/posix_api.o gcc/boost_regex-gcc-d-1_31/regex.o gcc/boost_regex-gcc-d-1_31/regex_debug.o gcc/boost_regex-gcc-d-1_31/regex_synch.o gcc/boost_regex-gcc-d-1_31/w32_regex_traits.o gcc/boost_regex-gcc-d-1_31/wide_posix_api.o gcc/boost_regex-gcc-d-1_31/winstances.o
-$(AR) -s gcc/libboost_regex-gcc-d-1_31.a

View File

@ -24,6 +24,11 @@
</P>
<HR>
<p></p>
<P>Captures are the iterator ranges that are "captured" by marked sub-expressions
as a regular expression gets matched.&nbsp; Each marked sub-expression can
result in more than one capture, if it is matched more than once.&nbsp; This
document explains how captures and marked sub-expressions in Boost.Regex are
represented and accessed.</P>
<H2>Marked sub-expressions</H2>
<P>Every time a Perl regular expression contains a parenthesis group (), it spits
out an extra field, known as a marked sub-expression, for example the
@ -247,4 +252,3 @@ Text: "now is the time for all good men to come to the aid of the party"
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -25,25 +25,32 @@
<BR>
<BR>
<HR>
<P>The author can be contacted at john@johnmaddock.co.uk;&nbsp;the
home page for this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
<P>I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think
about algorithms and their performance, and to the folks at boost for forcing
me to <I>think</I>, period. The following people have all contributed useful
comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree
Balasubramanian, Jan B<>lsche, Beman Dawes, Paul Baxter, David Bergman, David
Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias
Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones,
Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens
Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie
Smith, Mike Smyth, Alexander Sokolovsky, Herv<72> Poirier, Michael Raykh, Marc
Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward,
Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals
supplied with the Henry Spencer, Perl and GNU regular expression libraries -
wherever possible I have tried to maintain compatibility with these libraries
and with the POSIX standard - the code however is entirely my own, including
any bugs! I can absolutely guarantee that I will not fix any bugs I don't know
about, so if you have any comments or spot any bugs, please get in touch.</P>
<P>The author can be contacted at john@johnmaddock.co.uk;&nbsp;the home page for
this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
<P>I am indebted to <A href="http://www.cs.princeton.edu/~rs/">Robert Sedgewick's
"Algorithms in C++" </A>for forcing me to think about algorithms and their
performance, and to the folks at <A href="http://www.boost.org">boost</A> for
forcing me to <I>think</I>, period.</P>
<P><A href="http://www.boost-consulting.com">Eric Niebler</A>, author of the <A href="http://research.microsoft.com/projects/greta">
GRETA regular expression component</A>, has shared several important ideas,
in a series of long discussions.</P>
<P>Pete Becker, of <A href="http://www.dinkumware.com/">Dinkumware Ltd</A>, has
helped enormously with the standardisation proposal language.</P>
<P>The following people have all contributed useful comments or fixes: Dave
Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan B<>lsche,
Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter
Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire,
Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan
Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt,
Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky,
Herv<EFBFBD> Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey
Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and
Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer,
Perl and GNU regular expression libraries - wherever possible I have tried to
maintain compatibility with these libraries and with the POSIX standard - the
code however is entirely my own, including any bugs! I can absolutely guarantee
that I will not fix any bugs I don't know about, so if you have any comments or
spot any bugs, please get in touch.</P>
<P>Useful further information can be found at:</P>
<P>Short&nbsp;tutorials on regular expressions can be <A href="http://etext.lib.virginia.edu/helpsheets/regex.html">
found here</A> and&nbsp;<A href="http://www.devshed.com/Server_Side/Administration/RegExp/page1.html">here</A>.</P>
@ -72,8 +79,7 @@
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -1,153 +1,114 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<meta name="generator" content="HTML Tidy, see www.w3.org">
<title>Boost.Regex: FAQ</title>
<meta http-equiv="Content-Type" content=
"text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<p></p>
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
border="0">
<tr>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
"C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<td width="353">
<h1 align="center">Boost.Regex</h1>
<h2 align="center">FAQ</h2>
</td>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt=
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</tr>
</table>
<br>
<br>
<hr>
<font color="#ff0000"><font color="#ff0000"></font></font>
<p><font color="#ff0000"><font color="#ff0000"><font color=
"#ff0000">&nbsp;Q. Why can't I use the "convenience" versions of
regex_match / regex_search / regex_grep / regex_format /
regex_merge?</font></font></font></p>
<p>A. These versions may or may not be available depending upon the
capabilities of your compiler, the rules determining the format of
these functions are quite complex - and only the versions visible
to a standard compliant compiler are given in the help. To find out
what your compiler supports, run &lt;boost/regex.hpp&gt; through
your C++ pre-processor, and search the output file for the function
that you are interested in.<font color="#ff0000"><font color=
"#ff0000"></font></font></p>
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get
regex++ to work with escape characters, what's going
on?</font></font></p>
<p>A. If you embed regular expressions in C++ code, then remember
that escape characters are processed twice: once by the C++
compiler, and once by the regex++ expression compiler, so to pass
the regular expression \d+ to regex++, you need to embed "\\d+" in
your code. Likewise to match a literal backslash you will need to
embed "\\\\" in your code. <font color="#ff0000"></font></p>
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX
regular expression change the result of a match?</font></p>
<p>For POSIX (extended and basic) regular expressions, but not for
perl regexes, parentheses don't only mark; they determine what the
best match is as well. When the expression is compiled as a POSIX
basic or extended regex then Boost.regex follows the POSIX standard
leftmost longest rule for determining what matched. So if there is
more than one possible match after considering the whole
expression, it looks next at the first sub-expression and then the
second sub-expression and so on. So...</p>
<pre>
<head>
<title>Boost.Regex: FAQ</title>
<meta name="generator" content="HTML Tidy, see www.w3.org">
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<p></p>
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
<tr>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<td width="353">
<h1 align="center">Boost.Regex</h1>
<h2 align="center">FAQ</h2>
</td>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</tr>
</table>
<br>
<br>
<hr>
<font color="#ff0000"><font color="#ff0000"></font></font>
<p><font color="#ff0000"><font color="#ff0000"><font color="#ff0000">&nbsp;Q. Why can't I
use the "convenience" versions of regex_match / regex_search / regex_grep /
regex_format / regex_merge?</font></font></font></p>
<p>A. These versions may or may not be available depending upon the capabilities
of your compiler, the rules determining the format of these functions are quite
complex - and only the versions visible to a standard compliant compiler are
given in the help. To find out what your compiler supports, run
&lt;boost/regex.hpp&gt; through your C++ pre-processor, and search the output
file for the function that you are interested in.<font color="#ff0000"><font color="#ff0000"></font></font></p>
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get regex++ to work with
escape characters, what's going on?</font></font></p>
<p>A. If you embed regular expressions in C++ code, then remember that escape
characters are processed twice: once by the C++ compiler, and once by the
regex++ expression compiler, so to pass the regular expression \d+ to regex++,
you need to embed "\\d+" in your code. Likewise to match a literal backslash
you will need to embed "\\\\" in your code. <font color="#ff0000"></font>
</p>
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX regular expression
change the result of a match?</font></p>
<p>For POSIX (extended and basic) regular expressions, but not for perl regexes,
parentheses don't only mark; they determine what the best match is as well.
When the expression is compiled as a POSIX basic or extended regex then
Boost.regex follows the POSIX standard leftmost longest rule for determining
what matched. So if there is more than one possible match after considering the
whole expression, it looks next at the first sub-expression and then the second
sub-expression and so on. So...</p>
<pre>
"(0*)([0-9]*)" against "00123" would produce
$1 = "00"
$2 = "123"
</pre>
<p>where as</p>
<pre>
"0*([0-9)*" against "00123" would produce
<p>where as</p>
<pre>
"0*([0-9])*" against "00123" would produce
$1 = "00123"
</pre>
<p>If you think about it, had $1 only matched the "123", this would
be "less good" than the match "00123" which is both further to the
left and longer. If you want $1 to match only the "123" part, then
you need to use something like:</p>
<pre>
<p>If you think about it, had $1 only matched the "123", this would be "less good"
than the match "00123" which is both further to the left and longer. If you
want $1 to match only the "123" part, then you need to use something like:</p>
<pre>
"0*([1-9][0-9]*)"
</pre>
<p>as the expression.</p>
<p><font color="#ff0000">Q. Why don't character ranges work
properly (POSIX mode only)?</font><br>
A. The POSIX standard specifies that character range expressions
are locale sensitive - so for example the expression [A-Z] will
match any collating element that collates between 'A' and 'Z'. That
means that for most locales other than "C" or "POSIX", [A-Z] would
match the single character 't' for example, which is not what most
people expect - or at least not what most people have come to
expect from regular expression engines. For this reason, the
default behaviour of boost.regex (perl mode) is to turn locale
sensitive collation off by not setting the regex_constants::collate
compile time flag. However if you set a non-default compile time
flag - for example regex_constants::extended or
regex_constants::basic, then locale dependent collation will be
enabled, this also applies to the POSIX API functions which use
either regex_constants::extended or regex_constants::basic
internally. <i>[Note - when regex_constants::nocollate in effect,
the library behaves "as if" the LC_COLLATE locale category were
always "C", regardless of what its actually set to - end
note</i>].</p>
<p><font color="#ff0000">Q. Why are there no throw specifications
on any of the functions? What exceptions can the library
throw?</font></p>
<p>A. Not all compilers support (or honor) throw specifications,
others support them but with reduced efficiency. Throw
specifications may be added at a later date as compilers begin to
handle this better. The library should throw only three types of
exception: boost::bad_expression can be thrown by basic_regex when
compiling a regular expression, std::runtime_error can be thrown
when a call to basic_regex::imbue tries to open a message catalogue
that doesn't exist, or when a call to regex_search or regex_match
results in an "everlasting" search,&nbsp;or when a call to
RegEx::GrepFiles or RegEx::FindFiles tries to open a file that
cannot be opened, finally std::bad_alloc can be thrown by just
about any of the functions in this library.</p>
<p></p>
<hr>
<p>as the expression.</p>
<p><font color="#ff0000">Q. Why don't character ranges work properly (POSIX mode
only)?</font><br>
A. The POSIX standard specifies that character range expressions are locale
sensitive - so for example the expression [A-Z] will match any collating
element that collates between 'A' and 'Z'. That means that for most locales
other than "C" or "POSIX", [A-Z] would match the single character 't' for
example, which is not what most people expect - or at least not what most
people have come to expect from regular expression engines. For this reason,
the default behaviour of boost.regex (perl mode) is to turn locale sensitive
collation off by not setting the regex_constants::collate compile time flag.
However if you set a non-default compile time flag - for example
regex_constants::extended or regex_constants::basic, then locale dependent
collation will be enabled, this also applies to the POSIX API functions which
use either regex_constants::extended or regex_constants::basic internally. <i>[Note
- when regex_constants::nocollate in effect, the library behaves "as if" the
LC_COLLATE locale category were always "C", regardless of what its actually set
to - end note</i>].</p>
<p><font color="#ff0000">Q. Why are there no throw specifications on any of the
functions? What exceptions can the library throw?</font></p>
<p>A. Not all compilers support (or honor) throw specifications, others support
them but with reduced efficiency. Throw specifications may be added at a later
date as compilers begin to handle this better. The library should throw only
three types of exception: boost::bad_expression can be thrown by basic_regex
when compiling a regular expression, std::runtime_error can be thrown when a
call to basic_regex::imbue tries to open a message catalogue that doesn't
exist, or when a call to regex_search or regex_match results in an
"everlasting" search,&nbsp;or when a call to RegEx::GrepFiles or
RegEx::FindFiles tries to open a file that cannot be opened, finally
std::bad_alloc can be thrown by just about any of the functions in this
library.</p>
<p></p>
<hr>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</body>
</html>

View File

@ -31,7 +31,7 @@
character sequence.&nbsp; The behavior of the format flags is descibed in more
detail in the <A href="format_syntax.html">format syntax guide</A>.</p>
<pre>
namespace std{ namespace regex_constants{
namespace boost{ namespace regex_constants{
typedef bitmask_type match_flag_type;
@ -59,7 +59,7 @@ static const match_flag_type format_first_only;
static const match_flag_type format_all;
} // namespace regex_constants
} // namespace std
} // namespace boost
</pre>
<h3>Description</h3>
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
@ -271,10 +271,10 @@ static const match_flag_type format_all;
<br>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -344,7 +344,7 @@ const_iterator end()const;
<p><b>Effects:</b> Returns a terminating iterator that enumerates over all the
marked sub-expression matches stored in *this.</p>
<h4><A name="format"></A>match_results reformatting</h4>
<pre>template &lt;class OutputIterator&gt;
<pre><A name=m12></A>template &lt;class OutputIterator&gt;
OutputIterator format(OutputIterator out,
const string_type&amp; fmt,
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@
iterator first,
iterator last,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default)
boost::match_flag_type flags = match_default)
</pre>
<p>The library also defines the following convenience versions, which take either
a const charT*, or a const std::basic_string&lt;&gt;&amp; in place of a pair of
@ -53,13 +53,13 @@
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
<b>const</b> charT* str,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default);
boost::match_flag_type flags = match_default);
<b>template</b> &lt;<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits&gt;
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
<b>const</b> std::basic_string&lt;charT, ST, SA&gt;&amp; s,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default);
boost::match_flag_type flags = match_default);
</pre>
<p>The parameters for the primary version of regex_grep have the following
meanings:&nbsp;</p>
@ -370,11 +370,10 @@ index[std::string(what[5].first, what[5].second) + std::string(what[6].first, wh
<hr>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -294,7 +294,7 @@ void</B> IndexClasses(map_type&amp; m, <B>const</B> std::string&amp; file)
&nbsp;&nbsp; start = file.begin();
&nbsp;&nbsp; end = file.end();&nbsp;
&nbsp;&nbsp; &nbsp;&nbsp; boost::<a href="match_results.html">match_results</a>&lt;std::string::const_iterator&gt; what;
&nbsp;&nbsp; <B>unsigned</B> <B>int</B> flags = boost::match_default;
&nbsp;&nbsp; boost::match_flag_type flags = boost::match_default;
&nbsp;&nbsp; <B>while</B>(regex_search(start, end, what, expression, flags))&nbsp;
&nbsp;&nbsp; {
<FONT color=#000080>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <I>// what[0] contains the whole string
@ -314,11 +314,10 @@ void</B> IndexClasses(map_type&amp; m, <B>const</B> std::string&amp; file)
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -38,15 +38,15 @@
<PRE><B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2&gt;
std::size_t regex_split(OutputIterator out,&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::basic_string&lt;charT, Traits1, Alloc1&gt;&amp; s,&nbsp;
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; unsigned</B> flags,
&nbsp;<B> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;<STRONG>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</STRONG>boost::match_flag_type flags,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::size_t max_split);
<B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2&gt;
std::size_t regex_split(OutputIterator out,&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::basic_string&lt;charT, Traits1, Alloc1&gt;&amp; s,&nbsp;
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <B>unsigned</B> flags = match_default);
&nbsp;<B> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; boost::match_flag_type flags = match_default);
<B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1&gt;
std::size_t regex_split(OutputIterator out,
@ -134,11 +134,10 @@ boost::regex e(<FONT color=#000080>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -76,7 +76,7 @@ typedef regex_token_iterator&lt;const char*&gt; cregex_token_i
typedef regex_token_iterator&lt;std::string::const_iterator&gt; sregex_token_iterator;
#ifndef BOOST_NO_WREGEX
typedef regex_token_iterator&lt;const wchar_t*&gt; wcregex_token_iterator;
typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
typedef regex_token_iterator&lt;&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
#endif
</PRE>
<H3><A name="description"></A>Description</H3>
@ -84,7 +84,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist
for the lifetime of the iterator constructed from it.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
string for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The
@ -99,7 +100,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
configured</A> in non-recursive mode).</P>
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
<P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.</P>
<P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.&nbsp;
Object re shall exist for the lifetime of the iterator constructed from it.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
strings for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For
@ -118,7 +120,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
<PRE><A name=c4></A>template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist
for the lifetime of the iterator constructed from it.</P>
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match

View File

@ -24,10 +24,12 @@
</P>
<HR>
<p></p>
<P>Under construction.</P>
<P>The current boost.regex traits class design will be migrated to that specified
in the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
expression standardization proposal</A>.&nbsp;</P>
<P>
Under construction: the current design will be replaced by that specified in
the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
expression standardization proposal</A>, the current (obsolete) design has
it's <A href="http://cvs.sourceforge.net/viewcvs.py/*checkout*/boost/boost/libs/regex/Attic/traits_class_ref.htm?rev=1.11">
documentation archived online</A>.</P>
<P>
<HR>
<P></P>
@ -36,11 +38,9 @@
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -91,18 +91,18 @@
<P>Parentheses serve two purposes, to group items together into a sub-expression,
and to mark what generated the match. For example the expression "(ab)*" would
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
regex_match</A> and <A href="regex_search.html">regex_search</A>
each take an instance of <A href="match_results.html">match_results</A>
that reports what caused the match, on exit from these functions the <A href="match_results.html">
match_results</A> contains information both on what the whole expression
matched and on what each sub-expression matched. In the example above
match_results[1] would contain a pair of iterators denoting the final "ab" of
the matching string. It is permissible for sub-expressions to match null
strings. If a sub-expression takes no part in a match - for example if it is
part of an alternative that is not taken - then both of the iterators that are
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
from left to right starting from 1, sub-expression 0 is the whole expression.
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
an instance of <A href="match_results.html">match_results</A> that reports what
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
contains information both on what the whole expression matched and on what each
sub-expression matched. In the example above match_results[1] would contain a
pair of iterators denoting the final "ab" of the matching string. It is
permissible for sub-expressions to match null strings. If a sub-expression
takes no part in a match - for example if it is part of an alternative that is
not taken - then both of the iterators that are returned for that
sub-expression point to the end of the input string, and the <I>matched</I> parameter
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
to right starting from 1, sub-expression 0 is the whole expression.
</P>
<H3>Non-Marking Parenthesis
</H3>
@ -143,7 +143,7 @@
<P>A set is a set of characters that can match any single character that is a
member of the set. Sets are delimited by "[" and "]" and can contain literals,
character ranges, character classes, collating elements and equivalence
classes. Set declarations that start with "^" contain the compliment of the
classes. Set declarations that start with "^" contain the complement of the
elements that follow.
</P>
<P>Examples:
@ -293,7 +293,7 @@
[^[.ae.]] would only match one character.&nbsp;
</P>
<P>
Equivalence classes take the general form[=tagname=] inside a set declaration,
Equivalence classes take the generalform[=tagname=] inside a set declaration,
where <I>tagname</I> is either a single character, or a name of a collating
element, and matches any character that is a member of the same primary
equivalence class as the collating element [.tagname.]. An equivalence class is
@ -302,7 +302,7 @@
typically collated by character, then by accent, and then by case; the primary
sort key then relates to the character, the secondary to the accentation, and
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
locale independent method of obtaining the primary sort key for a character,
except under Win32. For other operating systems the library will "guess" the
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
@ -666,106 +666,103 @@
<H3>What gets matched?
</H3>
<P>
When the expression is compiled as a Perl-compatible regex then the matching
algorithms will perform a depth first search on the state machine and report
the first match found.</P>
When the expression is compiled as a Perl-compatible regex then the matching
algorithms will perform a depth first search on the state machine and report
the first match found.</P>
<P>
When the expression is compiled as a POSIX-compatible regex then the matching
algorithms will match the first possible matching string, if more than one
string starting at a given location can match then it matches the longest
possible string, unless the flag match_any is set, in which case the first
match encountered is returned. Use of the match_any option can reduce the time
taken to find the match - but is only useful if the user is less concerned
about what matched - for example it would not be suitable for search and
replace operations. In cases where their are multiple possible matches all
starting at the same location, and all of the same length, then the match
chosen is the one with the longest first sub-expression, if that is the same
for two or more matches, then the second sub-expression will be examined and so
on.
</P><P>
The following table examples illustrate the main differences between Perl and
POSIX regular expression matching rules:
When the expression is compiled as a POSIX-compatible regex then the matching
algorithms will match the first possible matching string, if more than one
string starting at a given location can match then it matches the longest
possible string, unless the flag match_any is set, in which case the first
match encountered is returned. Use of the match_any option can reduce the time
taken to find the match - but is only useful if the user is less concerned
about what matched - for example it would not be suitable for search and
replace operations. In cases where their are multiple possible matches all
starting at the same location, and all of the same length, then the match
chosen is the one with the longest first sub-expression, if that is the same
for two or more matches, then the second sub-expression will be examined and so
on.
</P>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="25%">
<P>Expression</P>
</TD>
<TD vAlign="top" width="25%">
<P>Text</P>
</TD>
<TD vAlign="top" width="25%">
<P>POSIX leftmost longest match</P>
</TD>
<TD vAlign="top" width="25%">
<P>ECMAScript depth first search match</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>a|ab</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>
xaby</CODE>
The following table examples illustrate the main differences between Perl and
POSIX regular expression matching rules:
</P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>
"ab"</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
"a"</CODE></P></TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>
.*([[:alnum:]]+).*</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
" abc def xyz "</CODE></P></TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "abc"</P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "z"</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>
.*(a|xayy)</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
zzxayyzz</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
"zzxayy"</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>"zzxa"</CODE></P>
</TD>
</TR>
</TBODY></CODE></TD></TR></TABLE>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="25%">
<P>Expression</P>
</TD>
<TD vAlign="top" width="25%">
<P>Text</P>
</TD>
<TD vAlign="top" width="25%">
<P>POSIX leftmost longest match</P>
</TD>
<TD vAlign="top" width="25%">
<P>ECMAScript depth first search match</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>a|ab</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> xaby</CODE>
</P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "ab"</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "a"</CODE></P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> " abc def xyz "</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "abc"</P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "z"</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE> .*(a|xayy)</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> zzxayyzz</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "zzxayy"</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>"zzxa"</CODE></P>
</TD>
</TR>
</TBODY></CODE></TD></TR></TABLE>
<P>These differences between Perl matching rules, and POSIX matching rules, mean
that these two regular expression syntaxes differ not only in the features
offered, but also in the form that the state machine takes and/or the
algorithms used to traverse the state machine.</p>
<HR>
algorithms used to traverse the state machine.</P>
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -24,6 +24,11 @@
</P>
<HR>
<p></p>
<P>Captures are the iterator ranges that are "captured" by marked sub-expressions
as a regular expression gets matched.&nbsp; Each marked sub-expression can
result in more than one capture, if it is matched more than once.&nbsp; This
document explains how captures and marked sub-expressions in Boost.Regex are
represented and accessed.</P>
<H2>Marked sub-expressions</H2>
<P>Every time a Perl regular expression contains a parenthesis group (), it spits
out an extra field, known as a marked sub-expression, for example the
@ -247,4 +252,3 @@ Text: "now is the time for all good men to come to the aid of the party"
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -25,25 +25,32 @@
<BR>
<BR>
<HR>
<P>The author can be contacted at john@johnmaddock.co.uk;&nbsp;the
home page for this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
<P>I am indebted to Robert Sedgewick's "Algorithms in C++" for forcing me to think
about algorithms and their performance, and to the folks at boost for forcing
me to <I>think</I>, period. The following people have all contributed useful
comments or fixes: Dave Abrahams, Mike Allison, Edan Ayal, Jayashree
Balasubramanian, Jan B<>lsche, Beman Dawes, Paul Baxter, David Bergman, David
Dennerline, Edward Diener, Peter Dimov, Robert Dunn, Fabio Forno, Tobias
Gabrielsson, Rob Gillen, Marc Gregoire, Chris Hecker, Nick Hodapp, Jesse Jones,
Martin Jost, Boris Krasnovskiy, Jan Hermelink, Max Leung, Wei-hao Lin, Jens
Maurer, Richard Peters, Heiko Schmidt, Jason Shirk, Gerald Slacik, Scobie
Smith, Mike Smyth, Alexander Sokolovsky, Herv<72> Poirier, Michael Raykh, Marc
Recht, Scott VanCamp, Bruno Voigt, Alexey Voinov, Jerry Waldorf, Rob Ward,
Lealon Watts, Thomas Witt and Yuval Yosef. I am also grateful to the manuals
supplied with the Henry Spencer, Perl and GNU regular expression libraries -
wherever possible I have tried to maintain compatibility with these libraries
and with the POSIX standard - the code however is entirely my own, including
any bugs! I can absolutely guarantee that I will not fix any bugs I don't know
about, so if you have any comments or spot any bugs, please get in touch.</P>
<P>The author can be contacted at john@johnmaddock.co.uk;&nbsp;the home page for
this library is at <A href="http://www.boost.org">www.boost.org</A>.</P>
<P>I am indebted to <A href="http://www.cs.princeton.edu/~rs/">Robert Sedgewick's
"Algorithms in C++" </A>for forcing me to think about algorithms and their
performance, and to the folks at <A href="http://www.boost.org">boost</A> for
forcing me to <I>think</I>, period.</P>
<P><A href="http://www.boost-consulting.com">Eric Niebler</A>, author of the <A href="http://research.microsoft.com/projects/greta">
GRETA regular expression component</A>, has shared several important ideas,
in a series of long discussions.</P>
<P>Pete Becker, of <A href="http://www.dinkumware.com/">Dinkumware Ltd</A>, has
helped enormously with the standardisation proposal language.</P>
<P>The following people have all contributed useful comments or fixes: Dave
Abrahams, Mike Allison, Edan Ayal, Jayashree Balasubramanian, Jan B<>lsche,
Beman Dawes, Paul Baxter, David Bergman, David Dennerline, Edward Diener, Peter
Dimov, Robert Dunn, Fabio Forno, Tobias Gabrielsson, Rob Gillen, Marc Gregoire,
Chris Hecker, Nick Hodapp, Jesse Jones, Martin Jost, Boris Krasnovskiy, Jan
Hermelink, Max Leung, Wei-hao Lin, Jens Maurer, Richard Peters, Heiko Schmidt,
Jason Shirk, Gerald Slacik, Scobie Smith, Mike Smyth, Alexander Sokolovsky,
Herv<EFBFBD> Poirier, Michael Raykh, Marc Recht, Scott VanCamp, Bruno Voigt, Alexey
Voinov, Jerry Waldorf, Rob Ward, Lealon Watts, John Wismar, Thomas Witt and
Yuval Yosef. I am also grateful to the manuals supplied with the Henry Spencer,
Perl and GNU regular expression libraries - wherever possible I have tried to
maintain compatibility with these libraries and with the POSIX standard - the
code however is entirely my own, including any bugs! I can absolutely guarantee
that I will not fix any bugs I don't know about, so if you have any comments or
spot any bugs, please get in touch.</P>
<P>Useful further information can be found at:</P>
<P>Short&nbsp;tutorials on regular expressions can be <A href="http://etext.lib.virginia.edu/helpsheets/regex.html">
found here</A> and&nbsp;<A href="http://www.devshed.com/Server_Side/Administration/RegExp/page1.html">here</A>.</P>
@ -72,8 +79,7 @@
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -1,153 +1,114 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<meta name="generator" content="HTML Tidy, see www.w3.org">
<title>Boost.Regex: FAQ</title>
<meta http-equiv="Content-Type" content=
"text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<p></p>
<table id="Table1" cellspacing="1" cellpadding="1" width="100%"
border="0">
<tr>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt=
"C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<td width="353">
<h1 align="center">Boost.Regex</h1>
<h2 align="center">FAQ</h2>
</td>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt=
"Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</tr>
</table>
<br>
<br>
<hr>
<font color="#ff0000"><font color="#ff0000"></font></font>
<p><font color="#ff0000"><font color="#ff0000"><font color=
"#ff0000">&nbsp;Q. Why can't I use the "convenience" versions of
regex_match / regex_search / regex_grep / regex_format /
regex_merge?</font></font></font></p>
<p>A. These versions may or may not be available depending upon the
capabilities of your compiler, the rules determining the format of
these functions are quite complex - and only the versions visible
to a standard compliant compiler are given in the help. To find out
what your compiler supports, run &lt;boost/regex.hpp&gt; through
your C++ pre-processor, and search the output file for the function
that you are interested in.<font color="#ff0000"><font color=
"#ff0000"></font></font></p>
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get
regex++ to work with escape characters, what's going
on?</font></font></p>
<p>A. If you embed regular expressions in C++ code, then remember
that escape characters are processed twice: once by the C++
compiler, and once by the regex++ expression compiler, so to pass
the regular expression \d+ to regex++, you need to embed "\\d+" in
your code. Likewise to match a literal backslash you will need to
embed "\\\\" in your code. <font color="#ff0000"></font></p>
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX
regular expression change the result of a match?</font></p>
<p>For POSIX (extended and basic) regular expressions, but not for
perl regexes, parentheses don't only mark; they determine what the
best match is as well. When the expression is compiled as a POSIX
basic or extended regex then Boost.regex follows the POSIX standard
leftmost longest rule for determining what matched. So if there is
more than one possible match after considering the whole
expression, it looks next at the first sub-expression and then the
second sub-expression and so on. So...</p>
<pre>
<head>
<title>Boost.Regex: FAQ</title>
<meta name="generator" content="HTML Tidy, see www.w3.org">
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" type="text/css" href="../../../boost.css">
</head>
<body>
<p></p>
<table id="Table1" cellspacing="1" cellpadding="1" width="100%" border="0">
<tr>
<td valign="top" width="300">
<h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../c++boost.gif" border="0"></a></h3>
</td>
<td width="353">
<h1 align="center">Boost.Regex</h1>
<h2 align="center">FAQ</h2>
</td>
<td width="50">
<h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3>
</td>
</tr>
</table>
<br>
<br>
<hr>
<font color="#ff0000"><font color="#ff0000"></font></font>
<p><font color="#ff0000"><font color="#ff0000"><font color="#ff0000">&nbsp;Q. Why can't I
use the "convenience" versions of regex_match / regex_search / regex_grep /
regex_format / regex_merge?</font></font></font></p>
<p>A. These versions may or may not be available depending upon the capabilities
of your compiler, the rules determining the format of these functions are quite
complex - and only the versions visible to a standard compliant compiler are
given in the help. To find out what your compiler supports, run
&lt;boost/regex.hpp&gt; through your C++ pre-processor, and search the output
file for the function that you are interested in.<font color="#ff0000"><font color="#ff0000"></font></font></p>
<p><font color="#ff0000"><font color="#ff0000">Q. I can't get regex++ to work with
escape characters, what's going on?</font></font></p>
<p>A. If you embed regular expressions in C++ code, then remember that escape
characters are processed twice: once by the C++ compiler, and once by the
regex++ expression compiler, so to pass the regular expression \d+ to regex++,
you need to embed "\\d+" in your code. Likewise to match a literal backslash
you will need to embed "\\\\" in your code. <font color="#ff0000"></font>
</p>
<p><font color="#ff0000">Q. Why does using parenthesis in a POSIX regular expression
change the result of a match?</font></p>
<p>For POSIX (extended and basic) regular expressions, but not for perl regexes,
parentheses don't only mark; they determine what the best match is as well.
When the expression is compiled as a POSIX basic or extended regex then
Boost.regex follows the POSIX standard leftmost longest rule for determining
what matched. So if there is more than one possible match after considering the
whole expression, it looks next at the first sub-expression and then the second
sub-expression and so on. So...</p>
<pre>
"(0*)([0-9]*)" against "00123" would produce
$1 = "00"
$2 = "123"
</pre>
<p>where as</p>
<pre>
"0*([0-9)*" against "00123" would produce
<p>where as</p>
<pre>
"0*([0-9])*" against "00123" would produce
$1 = "00123"
</pre>
<p>If you think about it, had $1 only matched the "123", this would
be "less good" than the match "00123" which is both further to the
left and longer. If you want $1 to match only the "123" part, then
you need to use something like:</p>
<pre>
<p>If you think about it, had $1 only matched the "123", this would be "less good"
than the match "00123" which is both further to the left and longer. If you
want $1 to match only the "123" part, then you need to use something like:</p>
<pre>
"0*([1-9][0-9]*)"
</pre>
<p>as the expression.</p>
<p><font color="#ff0000">Q. Why don't character ranges work
properly (POSIX mode only)?</font><br>
A. The POSIX standard specifies that character range expressions
are locale sensitive - so for example the expression [A-Z] will
match any collating element that collates between 'A' and 'Z'. That
means that for most locales other than "C" or "POSIX", [A-Z] would
match the single character 't' for example, which is not what most
people expect - or at least not what most people have come to
expect from regular expression engines. For this reason, the
default behaviour of boost.regex (perl mode) is to turn locale
sensitive collation off by not setting the regex_constants::collate
compile time flag. However if you set a non-default compile time
flag - for example regex_constants::extended or
regex_constants::basic, then locale dependent collation will be
enabled, this also applies to the POSIX API functions which use
either regex_constants::extended or regex_constants::basic
internally. <i>[Note - when regex_constants::nocollate in effect,
the library behaves "as if" the LC_COLLATE locale category were
always "C", regardless of what its actually set to - end
note</i>].</p>
<p><font color="#ff0000">Q. Why are there no throw specifications
on any of the functions? What exceptions can the library
throw?</font></p>
<p>A. Not all compilers support (or honor) throw specifications,
others support them but with reduced efficiency. Throw
specifications may be added at a later date as compilers begin to
handle this better. The library should throw only three types of
exception: boost::bad_expression can be thrown by basic_regex when
compiling a regular expression, std::runtime_error can be thrown
when a call to basic_regex::imbue tries to open a message catalogue
that doesn't exist, or when a call to regex_search or regex_match
results in an "everlasting" search,&nbsp;or when a call to
RegEx::GrepFiles or RegEx::FindFiles tries to open a file that
cannot be opened, finally std::bad_alloc can be thrown by just
about any of the functions in this library.</p>
<p></p>
<hr>
<p>as the expression.</p>
<p><font color="#ff0000">Q. Why don't character ranges work properly (POSIX mode
only)?</font><br>
A. The POSIX standard specifies that character range expressions are locale
sensitive - so for example the expression [A-Z] will match any collating
element that collates between 'A' and 'Z'. That means that for most locales
other than "C" or "POSIX", [A-Z] would match the single character 't' for
example, which is not what most people expect - or at least not what most
people have come to expect from regular expression engines. For this reason,
the default behaviour of boost.regex (perl mode) is to turn locale sensitive
collation off by not setting the regex_constants::collate compile time flag.
However if you set a non-default compile time flag - for example
regex_constants::extended or regex_constants::basic, then locale dependent
collation will be enabled, this also applies to the POSIX API functions which
use either regex_constants::extended or regex_constants::basic internally. <i>[Note
- when regex_constants::nocollate in effect, the library behaves "as if" the
LC_COLLATE locale category were always "C", regardless of what its actually set
to - end note</i>].</p>
<p><font color="#ff0000">Q. Why are there no throw specifications on any of the
functions? What exceptions can the library throw?</font></p>
<p>A. Not all compilers support (or honor) throw specifications, others support
them but with reduced efficiency. Throw specifications may be added at a later
date as compilers begin to handle this better. The library should throw only
three types of exception: boost::bad_expression can be thrown by basic_regex
when compiling a regular expression, std::runtime_error can be thrown when a
call to basic_regex::imbue tries to open a message catalogue that doesn't
exist, or when a call to regex_search or regex_match results in an
"everlasting" search,&nbsp;or when a call to RegEx::GrepFiles or
RegEx::FindFiles tries to open a file that cannot be opened, finally
std::bad_alloc can be thrown by just about any of the functions in this
library.</p>
<p></p>
<hr>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</body>
</html>

View File

@ -46,10 +46,10 @@
<dl class="index">
<dt><a href="syntax_option_type.html">syntax_option_type</a></dt> <dt><a href="match_flag_type.html">
match_flag_type</a></dt> <dt><a href="bad_expression.html">class bad_expression</a></dt>
<dt><a href="regex_traits.html">class regex_traits</a></dt> <dt><a href="basic_regex.html">
class template basic_regex</a></dt> <dt><a href="sub_match.html">class template
sub_match</a></dt> <dt><a href="match_results.html">class template
match_results</a></dt>
<dt><a href="regex_traits.html">class regex_traits</a></dt>
<dt><a href="basic_regex.html">class template basic_regex</a></dt>
<dt><a href="sub_match.html">class template sub_match</a></dt>
<dt><a href="match_results.html">class template match_results</a></dt>
</dl>
</dd>
<dt>Algorithms</dt>
@ -66,6 +66,25 @@
<dt><a href="regex_token_iterator.html">regex_token_iterator</a></dt>
</dl>
</dd>
<dt>Typedefs</dt>
<dd>
<dl class="index">
<dt><a href="basic_regex.html">regex</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = basic_regex&lt;char&gt; ]</dt>
<dt><a href="basic_regex.html">wregex</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = basic_regex&lt;wchar_t&gt; ]</dt>
<dt><a href="match_results.html">cmatch</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = match_results&lt;const char*&gt; ]</dt>
<dt><a href="match_results.html">wcmatch</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = match_results&lt;const wchar_t*&gt; ]</dt>
<dt><a href="match_results.html">smatch</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = match_results&lt;std::string::const_iterator&gt; ]</dt>
<dt><a href="match_results.html">wsmatch</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = match_results&lt;std::wstring::const_iterator&gt; ]</dt>
<dt><a href="regex_iterator.html">cregex_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_iterator&lt;const char*&gt;]</dt>
<dt><a href="regex_iterator.html">wcregex_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_iterator&lt;const wchar_t*&gt;]</dt>
<dt><a href="regex_iterator.html">sregex_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_iterator&lt;std::string::const_iterator&gt;]</dt>
<dt><a href="regex_iterator.html">wsregex_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_iterator&lt;std::wstring::const_iterator&gt;]</dt>
<dt><a href="regex_token_iterator.html">cregex_token_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_token_iterator&lt;const char*&gt;]</dt>
<dt><a href="regex_token_iterator.html">wcregex_token_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_token_iterator&lt;const wchar_t*&gt;]</dt>
<dt><a href="regex_token_iterator.html">sregex_token_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_token_iterator&lt;std::string::const_iterator&gt;]</dt>
<dt><a href="regex_token_iterator.html">wsregex_token_iterator</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[ = regex_token_iterator&lt;std::wstring::const_iterator&gt;]</dt>
</dl>
</dd>
<dt>Misc.</dt>
<dd>
<dl class="index">

View File

@ -31,7 +31,7 @@
character sequence.&nbsp; The behavior of the format flags is descibed in more
detail in the <A href="format_syntax.html">format syntax guide</A>.</p>
<pre>
namespace std{ namespace regex_constants{
namespace boost{ namespace regex_constants{
typedef bitmask_type match_flag_type;
@ -59,7 +59,7 @@ static const match_flag_type format_first_only;
static const match_flag_type format_all;
} // namespace regex_constants
} // namespace std
} // namespace boost
</pre>
<h3>Description</h3>
<p>The type <code>match_flag_type</code> is an implementation defined bitmask type
@ -271,10 +271,10 @@ static const match_flag_type format_all;
<br>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -344,7 +344,7 @@ const_iterator end()const;
<p><b>Effects:</b> Returns a terminating iterator that enumerates over all the
marked sub-expression matches stored in *this.</p>
<h4><A name="format"></A>match_results reformatting</h4>
<pre>template &lt;class OutputIterator&gt;
<pre><A name=m12></A>template &lt;class OutputIterator&gt;
OutputIterator format(OutputIterator out,
const string_type&amp; fmt,
<A href="match_flag_type.html" >match_flag_type</A> flags = format_default);

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@
iterator first,
iterator last,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default)
boost::match_flag_type flags = match_default)
</pre>
<p>The library also defines the following convenience versions, which take either
a const charT*, or a const std::basic_string&lt;&gt;&amp; in place of a pair of
@ -53,13 +53,13 @@
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
<b>const</b> charT* str,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default);
boost::match_flag_type flags = match_default);
<b>template</b> &lt;<b>class</b> Predicate, <b>class</b> ST, <b>class</b> SA, <b>class</b> Allocator, <b>class</b> charT, <b>class</b> traits&gt;
<b>unsigned</b> <b>int</b> regex_grep(Predicate foo,
<b>const</b> std::basic_string&lt;charT, ST, SA&gt;&amp; s,
<b>const</b> basic_regex&lt;charT, traits, Allocator&gt;&amp; e,
<b>unsigned</b> flags = match_default);
boost::match_flag_type flags = match_default);
</pre>
<p>The parameters for the primary version of regex_grep have the following
meanings:&nbsp;</p>
@ -370,11 +370,10 @@ index[std::string(what[5].first, what[5].second) + std::string(what[6].first, wh
<hr>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -294,7 +294,7 @@ void</B> IndexClasses(map_type&amp; m, <B>const</B> std::string&amp; file)
&nbsp;&nbsp; start = file.begin();
&nbsp;&nbsp; end = file.end();&nbsp;
&nbsp;&nbsp; &nbsp;&nbsp; boost::<a href="match_results.html">match_results</a>&lt;std::string::const_iterator&gt; what;
&nbsp;&nbsp; <B>unsigned</B> <B>int</B> flags = boost::match_default;
&nbsp;&nbsp; boost::match_flag_type flags = boost::match_default;
&nbsp;&nbsp; <B>while</B>(regex_search(start, end, what, expression, flags))&nbsp;
&nbsp;&nbsp; {
<FONT color=#000080>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <I>// what[0] contains the whole string
@ -314,11 +314,10 @@ void</B> IndexClasses(map_type&amp; m, <B>const</B> std::string&amp; file)
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -38,15 +38,15 @@
<PRE><B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2&gt;
std::size_t regex_split(OutputIterator out,&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::basic_string&lt;charT, Traits1, Alloc1&gt;&amp; s,&nbsp;
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; unsigned</B> flags,
&nbsp;<B> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;<STRONG>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</STRONG>boost::match_flag_type flags,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::size_t max_split);
<B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2, <B>class</B> Alloc2&gt;
std::size_t regex_split(OutputIterator out,&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; std::basic_string&lt;charT, Traits1, Alloc1&gt;&amp; s,&nbsp;
&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <B>unsigned</B> flags = match_default);
&nbsp;<B> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;const</B> basic_regex&lt;charT, Traits2, Alloc2&gt;&amp; e,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; boost::match_flag_type flags = match_default);
<B>template</B> &lt;<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1&gt;
std::size_t regex_split(OutputIterator out,
@ -134,11 +134,10 @@ boost::regex e(<FONT color=#000080>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
04 Feb 2004
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -76,7 +76,7 @@ typedef regex_token_iterator&lt;const char*&gt; cregex_token_i
typedef regex_token_iterator&lt;std::string::const_iterator&gt; sregex_token_iterator;
#ifndef BOOST_NO_WREGEX
typedef regex_token_iterator&lt;const wchar_t*&gt; wcregex_token_iterator;
typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
typedef regex_token_iterator&lt;&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
#endif
</PRE>
<H3><A name="description"></A>Description</H3>
@ -84,7 +84,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
int submatch = 0, match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist
for the lifetime of the iterator constructed from it.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
string for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The
@ -99,7 +100,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
configured</A> in non-recursive mode).</P>
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
<P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.</P>
<P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.&nbsp;
Object re shall exist for the lifetime of the iterator constructed from it.</P>
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
strings for each regular expression match of the expression <EM>re</EM> found
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For
@ -118,7 +120,8 @@ typedef regex_token_iterator<&lt;std::wstring::const_iterator&gt; wsregex_token_
<PRE><A name=c4></A>template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re,
const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist
for the lifetime of the iterator constructed from it.</P>
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match

View File

@ -24,10 +24,12 @@
</P>
<HR>
<p></p>
<P>Under construction.</P>
<P>The current boost.regex traits class design will be migrated to that specified
in the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
expression standardization proposal</A>.&nbsp;</P>
<P>
Under construction: the current design will be replaced by that specified in
the <A href="http://anubis.dkuug.dk/jtc1/sc22/wg21/docs/papers/2003/n1429.htm">regular
expression standardization proposal</A>, the current (obsolete) design has
it's <A href="http://cvs.sourceforge.net/viewcvs.py/*checkout*/boost/boost/libs/regex/Attic/traits_class_ref.htm?rev=1.11">
documentation archived online</A>.</P>
<P>
<HR>
<P></P>
@ -36,11 +38,9 @@
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -91,18 +91,18 @@
<P>Parentheses serve two purposes, to group items together into a sub-expression,
and to mark what generated the match. For example the expression "(ab)*" would
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
regex_match</A> and <A href="regex_search.html">regex_search</A>
each take an instance of <A href="match_results.html">match_results</A>
that reports what caused the match, on exit from these functions the <A href="match_results.html">
match_results</A> contains information both on what the whole expression
matched and on what each sub-expression matched. In the example above
match_results[1] would contain a pair of iterators denoting the final "ab" of
the matching string. It is permissible for sub-expressions to match null
strings. If a sub-expression takes no part in a match - for example if it is
part of an alternative that is not taken - then both of the iterators that are
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
from left to right starting from 1, sub-expression 0 is the whole expression.
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
an instance of <A href="match_results.html">match_results</A> that reports what
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
contains information both on what the whole expression matched and on what each
sub-expression matched. In the example above match_results[1] would contain a
pair of iterators denoting the final "ab" of the matching string. It is
permissible for sub-expressions to match null strings. If a sub-expression
takes no part in a match - for example if it is part of an alternative that is
not taken - then both of the iterators that are returned for that
sub-expression point to the end of the input string, and the <I>matched</I> parameter
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
to right starting from 1, sub-expression 0 is the whole expression.
</P>
<H3>Non-Marking Parenthesis
</H3>
@ -143,7 +143,7 @@
<P>A set is a set of characters that can match any single character that is a
member of the set. Sets are delimited by "[" and "]" and can contain literals,
character ranges, character classes, collating elements and equivalence
classes. Set declarations that start with "^" contain the compliment of the
classes. Set declarations that start with "^" contain the complement of the
elements that follow.
</P>
<P>Examples:
@ -293,7 +293,7 @@
[^[.ae.]] would only match one character.&nbsp;
</P>
<P>
Equivalence classes take the general form[=tagname=] inside a set declaration,
Equivalence classes take the generalform[=tagname=] inside a set declaration,
where <I>tagname</I> is either a single character, or a name of a collating
element, and matches any character that is a member of the same primary
equivalence class as the collating element [.tagname.]. An equivalence class is
@ -302,7 +302,7 @@
typically collated by character, then by accent, and then by case; the primary
sort key then relates to the character, the secondary to the accentation, and
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
locale independent method of obtaining the primary sort key for a character,
except under Win32. For other operating systems the library will "guess" the
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
@ -666,106 +666,103 @@
<H3>What gets matched?
</H3>
<P>
When the expression is compiled as a Perl-compatible regex then the matching
algorithms will perform a depth first search on the state machine and report
the first match found.</P>
When the expression is compiled as a Perl-compatible regex then the matching
algorithms will perform a depth first search on the state machine and report
the first match found.</P>
<P>
When the expression is compiled as a POSIX-compatible regex then the matching
algorithms will match the first possible matching string, if more than one
string starting at a given location can match then it matches the longest
possible string, unless the flag match_any is set, in which case the first
match encountered is returned. Use of the match_any option can reduce the time
taken to find the match - but is only useful if the user is less concerned
about what matched - for example it would not be suitable for search and
replace operations. In cases where their are multiple possible matches all
starting at the same location, and all of the same length, then the match
chosen is the one with the longest first sub-expression, if that is the same
for two or more matches, then the second sub-expression will be examined and so
on.
</P><P>
The following table examples illustrate the main differences between Perl and
POSIX regular expression matching rules:
When the expression is compiled as a POSIX-compatible regex then the matching
algorithms will match the first possible matching string, if more than one
string starting at a given location can match then it matches the longest
possible string, unless the flag match_any is set, in which case the first
match encountered is returned. Use of the match_any option can reduce the time
taken to find the match - but is only useful if the user is less concerned
about what matched - for example it would not be suitable for search and
replace operations. In cases where their are multiple possible matches all
starting at the same location, and all of the same length, then the match
chosen is the one with the longest first sub-expression, if that is the same
for two or more matches, then the second sub-expression will be examined and so
on.
</P>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="25%">
<P>Expression</P>
</TD>
<TD vAlign="top" width="25%">
<P>Text</P>
</TD>
<TD vAlign="top" width="25%">
<P>POSIX leftmost longest match</P>
</TD>
<TD vAlign="top" width="25%">
<P>ECMAScript depth first search match</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>a|ab</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>
xaby</CODE>
The following table examples illustrate the main differences between Perl and
POSIX regular expression matching rules:
</P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>
"ab"</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
"a"</CODE></P></TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>
.*([[:alnum:]]+).*</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
" abc def xyz "</CODE></P></TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "abc"</P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "z"</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>
.*(a|xayy)</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
zzxayyzz</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>
"zzxayy"</CODE></P></TD>
<TD vAlign="top" width="25%">
<P><CODE>"zzxa"</CODE></P>
</TD>
</TR>
</TBODY></CODE></TD></TR></TABLE>
<P>
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
<TBODY>
<TR>
<TD vAlign="top" width="25%">
<P>Expression</P>
</TD>
<TD vAlign="top" width="25%">
<P>Text</P>
</TD>
<TD vAlign="top" width="25%">
<P>POSIX leftmost longest match</P>
</TD>
<TD vAlign="top" width="25%">
<P>ECMAScript depth first search match</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE>a|ab</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> xaby</CODE>
</P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "ab"</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "a"</CODE></P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> " abc def xyz "</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "abc"</P>
</TD>
<TD vAlign="top" width="25%">
<P>$0 = " abc def xyz "<BR>
$1 = "z"</P>
</TD>
</TR>
<TR>
<TD vAlign="top" width="25%">
<P><CODE> .*(a|xayy)</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> zzxayyzz</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE> "zzxayy"</CODE></P>
</TD>
<TD vAlign="top" width="25%">
<P><CODE>"zzxa"</CODE></P>
</TD>
</TR>
</TBODY></CODE></TD></TR></TABLE>
<P>These differences between Perl matching rules, and POSIX matching rules, mean
that these two regular expression syntaxes differ not only in the features
offered, but also in the form that the state machine takes and/or the
algorithms used to traverse the state machine.</p>
<HR>
algorithms used to traverse the state machine.</P>
<HR>
<p>Revised
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
</body>
</html>

View File

@ -252,7 +252,7 @@ public:
std::string What(int i = 0)const;
std::string operator[](int i)const { return What(i); }
static const unsigned int npos;
static const std::size_t npos;
friend struct re_detail::pred1;
friend struct re_detail::pred2;

View File

@ -72,7 +72,7 @@ typedef enum _match_flags
} match_flags;
#if BOOST_WORKAROUND(BOOST_MSVC, <= 1200) || BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x564))
#if BOOST_WORKAROUND(BOOST_MSVC, <= 1200) || BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x564)) || defined(__SUNPRO_CC)
typedef unsigned long match_flag_type;
#else
typedef match_flags match_flag_type;
@ -88,11 +88,11 @@ inline match_flags operator^(match_flags m1, match_flags m2)
inline match_flags operator~(match_flags m1)
{ return static_cast<match_flags>(~static_cast<boost::int32_t>(m1)); }
inline match_flags& operator&=(match_flags& m1, match_flags m2)
{ m1 = m1&m2; return m1; }
{ m1 = static_cast<match_flags>(m1&m2); return m1; }
inline match_flags& operator|=(match_flags& m1, match_flags m2)
{ m1 = m1|m2; return m1; }
{ m1 = static_cast<match_flags>(m1|m2); return m1; }
inline match_flags& operator^=(match_flags& m1, match_flags m2)
{ m1 = m1^m2; return m1; }
{ m1 = static_cast<match_flags>(m1^m2); return m1; }
#endif
#endif

View File

@ -54,7 +54,7 @@ perl_matcher<BidiIterator, Allocator, traits, Allocator2>::perl_matcher(BidiIter
estimate_max_state_count(static_cast<category*>(0));
if(!(m_match_flags & (match_perl|match_posix)))
{
if(re.flags() & regex_constants::perlex)
if((re.flags() & regex_constants::perlex) || (re.flags() & regex_constants::literal))
m_match_flags |= match_perl;
else
m_match_flags |= match_posix;
@ -82,7 +82,7 @@ void perl_matcher<BidiIterator, Allocator, traits, Allocator2>::estimate_max_sta
if(dist > (difference_type)(lim / states))
max_state_count = lim;
else
max_state_count = 1000 + states * dist;
max_state_count = 100000 + states * dist;
}
template <class BidiIterator, class Allocator, class traits, class Allocator2>
void perl_matcher<BidiIterator, Allocator, traits, Allocator2>::estimate_max_state_count(void*)
@ -205,10 +205,10 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_imp()
else
{
// start again:
search_base = position = (*m_presult)[0].second;
search_base = position = m_result[0].second;
// If last match was null and match_not_null was not set then increment
// our start position, otherwise we go into an infinite loop:
if(((m_match_flags & match_not_null) == 0) && (m_presult->length() == 0))
if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
{
if(position == last)
return false;

View File

@ -38,7 +38,13 @@ inline void inplace_destroy(T* p)
struct saved_state
{
unsigned int id;
union{
unsigned int id;
// these ensure that this struct gets the same alignment as derived structs:
void* padding1;
std::size_t padding2;
std::ptrdiff_t padding3;
};
saved_state(unsigned i) : id(i) {}
};
@ -927,8 +933,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_greedy_si
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
count -= rep->min;
@ -977,8 +983,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_slow_dot_
const re_repeat* rep = pmp->rep;
unsigned count = pmp->count;
assert(rep->type == syntax_element_dot_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_wild);
assert(count < rep->max);
@ -1005,7 +1011,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_slow_dot_
{
// can't repeat any more, remove the pushed state:
destroy_single_repeat();
if(rep->can_be_null & mask_skip)
if(0 == (rep->can_be_null & mask_skip))
return true;
}
else if(count == rep->max)
@ -1057,7 +1063,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_fast_dot_
{
// can't repeat any more, remove the pushed state:
destroy_single_repeat();
if(rep->can_be_null & mask_skip)
if(0 == (rep->can_be_null & mask_skip))
return true;
}
else if(count == rep->max)
@ -1095,8 +1101,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_char_repe
position = pmp->last_position;
assert(rep->type == syntax_element_char_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_literal);
assert(count < rep->max);
@ -1121,7 +1127,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_char_repe
{
// can't repeat any more, remove the pushed state:
destroy_single_repeat();
if(rep->can_be_null & mask_skip)
if(0 == (rep->can_be_null & mask_skip))
return true;
}
else if(count == rep->max)
@ -1159,8 +1165,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_short_set
position = pmp->last_position;
assert(rep->type == syntax_element_short_set_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_set);
assert(count < rep->max);
@ -1185,7 +1191,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_short_set
{
// can't repeat any more, remove the pushed state:
destroy_single_repeat();
if(rep->can_be_null & mask_skip)
if(0 == (rep->can_be_null & mask_skip))
return true;
}
else if(count == rep->max)
@ -1223,8 +1229,8 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_long_set_
position = pmp->last_position;
assert(rep->type == syntax_element_long_set_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_long_set);
assert(position != last);
assert(count < rep->max);
@ -1250,7 +1256,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::unwind_long_set_
{
// can't repeat any more, remove the pushed state:
destroy_single_repeat();
if(rep->can_be_null & mask_skip)
if(0 == (rep->can_be_null & mask_skip))
return true;
}
else if(count == rep->max)

View File

@ -400,7 +400,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_dot_repeat
// start by working out how much we can skip:
//
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
unsigned count = std::min(static_cast<unsigned>(re_detail::distance(position, last)), (rep->greedy ? rep->max : rep->min));
unsigned count = (std::min)(static_cast<unsigned>(re_detail::distance(position, last)), (rep->greedy ? rep->max : rep->min));
if(rep->min > count)
return false; // not enough text left to match
std::advance(position, count);
@ -458,7 +458,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_char_repea
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
std::advance(end, std::min((unsigned)re_detail::distance(position, last), desired));
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && (traits_inst.translate(*position, icase) == what))
{
@ -507,8 +507,16 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_char_repea
return false;
if(position == last)
return false;
position = ++save_pos;
++count;
position = save_pos;
if(traits_inst.translate(*position, icase) == what)
{
++position;
++count;
}
else
{
return false;
}
}while(true);
#ifdef __BORLANDC__
#pragma option pop
@ -538,7 +546,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_set_repeat
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
std::advance(end, std::min((unsigned)re_detail::distance(position, last), desired));
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && map[(traits_uchar_type)traits_inst.translate(*position, icase)])
{
@ -587,8 +595,16 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_set_repeat
return false;
if(position == last)
return false;
position = ++save_pos;
++count;
position = save_pos;
if(map[(traits_uchar_type)traits_inst.translate(*position, icase)])
{
++position;
++count;
}
else
{
return false;
}
}while(true);
#ifdef __BORLANDC__
#pragma option pop
@ -618,7 +634,7 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_long_set_r
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
std::advance(end, std::min((unsigned)re_detail::distance(position, last), desired));
std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && (position != re_is_set_member(position, last, set, re)))
{
@ -667,8 +683,16 @@ bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_long_set_r
return false;
if(position == last)
return false;
position = ++save_pos;
++count;
position = save_pos;
if(position != re_is_set_member(position, last, set, re))
{
++position;
++count;
}
else
{
return false;
}
}while(true);
#ifdef __BORLANDC__
#pragma option pop

View File

@ -721,6 +721,13 @@ re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Alloca
re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
if(_flags & regbase::icase)
{
if((cls == traits_type::char_class_upper) || (cls == traits_type::char_class_lower))
{
cls = traits_type::char_class_alpha;
}
}
classes.push(cls);
if(dat)
{
@ -1066,7 +1073,7 @@ re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Alloca
++csingles;
const traits_string_type& s = singles.peek();
std::size_t len = (s.size() + 1) * sizeof(charT);
if(len > sizeof(charT))
if(len > sizeof(charT) * 2)
singleton = false;
std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
singles.pop();
@ -1350,6 +1357,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
data.clear();
_flags = f;
fail(REG_NOERROR); // clear any error
_leading_len = 0; // set this to non-zero if there are any backrefs, we'll refer to it later...
if(arg_first >= arg_last)
{
@ -1427,7 +1435,8 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
{
case traits_type::syntax_colon:
static_cast<re_detail::re_brace*>(dat)->index = 0;
--marks;
if((_flags & nosubs) == 0)
--marks;
markid.pop();
markid.push(0);
++ptr;
@ -1437,7 +1446,8 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
markid.pop();
markid.push(-1);
common_forward_assert:
--marks;
if((_flags & nosubs) == 0)
--marks;
++ptr;
// extend:
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
@ -1462,7 +1472,8 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
case traits_type::syntax_hash:
// comment just skip it:
static_cast<re_detail::re_brace*>(dat)->index = 0;
--marks;
if((_flags & nosubs) == 0)
--marks;
markid.pop();
mark.pop();
do{
@ -1600,6 +1611,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
static_cast<re_detail::re_brace*>(dat)->index = i;
++ptr;
_leading_len = 1;
continue;
}
break;
@ -2141,7 +2153,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_le
case re_detail::syntax_element_char_rep:
case re_detail::syntax_element_short_set_rep:
case re_detail::syntax_element_long_set_rep:
if((len == 0) && (1 == fixup_leading_rep(dat->next.p, static_cast<re_detail::re_repeat*>(dat)->alt.p) ))
if((len == 0) && (_leading_len == 0) && (1 == fixup_leading_rep(dat->next.p, static_cast<re_detail::re_repeat*>(dat)->alt.p) ))
{
static_cast<re_detail::re_repeat*>(dat)->leading = leading_lit;
return len;

View File

@ -55,23 +55,23 @@ inline unsigned int regex_grep(Predicate foo,
return count; // we've reached the end, don't try and find an extra null match.
if(m.length() == 0)
{
if(m[0].second == last)
return count;
// we found a NULL-match, now try to find
// a non-NULL one at the same position:
BidiIterator last_end(m[0].second);
if(last_end == last)
return count;
match_results<BidiIterator, match_allocator_type> m2(m);
matcher.setf(match_not_null | match_continuous);
if(matcher.find())
{
++count;
last_end = m[0].second;
//last_end = m[0].second;
if(0 == foo(m))
return count;
}
else
{
// reset match back to where it was:
m.set_second(last_end);
m = m2;
}
matcher.unsetf((match_not_null | match_continuous) & ~flags);
}

View File

@ -76,6 +76,14 @@ template <class BidirectionalIterator,
class traits = regex_traits<charT>,
class Allocator = BOOST_DEFAULT_ALLOCATOR(charT) >
class regex_iterator
#ifndef BOOST_NO_STD_ITERATOR
: public std::iterator<
std::forward_iterator_tag,
match_results<BidirectionalIterator>,
typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type,
const match_results<BidirectionalIterator>*,
const match_results<BidirectionalIterator>& >
#endif
{
private:
typedef regex_iterator_implementation<BidirectionalIterator, charT, traits, Allocator> impl;

View File

@ -23,7 +23,8 @@
#include <boost/detail/workaround.hpp>
#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\
|| BOOST_WORKAROUND(BOOST_MSVC, < 1300) \
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003))
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \
|| BOOST_WORKAROUND(__HP_aCC, BOOST_TESTED_AT(55500))
//
// Borland C++ Builder 6, and Visual C++ 6,
// can't cope with the array template constructor
@ -51,11 +52,7 @@ template <class BidirectionalIterator,
class regex_token_iterator_implementation
{
typedef basic_regex<charT, traits, Allocator> regex_type;
#if 1
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type;
#endif
match_results<BidirectionalIterator> what; // current match
BidirectionalIterator end; // end of search area
@ -69,10 +66,11 @@ public:
regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f)
: end(last), pre(p), flags(f){ subs.push_back(sub); }
regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector<int>& v, match_flag_type f)
: end(last), pre(p), subs(v), flags(f){}
: end(last), pre(p), flags(f), subs(v){}
#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\
|| BOOST_WORKAROUND(BOOST_MSVC, < 1300) \
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003))
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \
|| BOOST_WORKAROUND(__HP_aCC, BOOST_TESTED_AT(55500))
template <class T>
regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const T& submatches, match_flag_type f)
: end(last), pre(p), flags(f)
@ -163,6 +161,14 @@ template <class BidirectionalIterator,
class traits = regex_traits<charT>,
class Allocator = BOOST_DEFAULT_ALLOCATOR(charT) >
class regex_token_iterator
#ifndef BOOST_NO_STD_ITERATOR
: public std::iterator<
std::forward_iterator_tag,
sub_match<BidirectionalIterator>,
typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type,
const sub_match<BidirectionalIterator>*,
const sub_match<BidirectionalIterator>& >
#endif
{
private:
typedef regex_token_iterator_implementation<BidirectionalIterator, charT, traits, Allocator> impl;
@ -193,7 +199,8 @@ public:
}
#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\
|| BOOST_WORKAROUND(BOOST_MSVC, < 1300) \
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003))
|| BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \
|| BOOST_WORKAROUND(__HP_aCC, BOOST_TESTED_AT(55500))
template <class T>
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
const T& submatches, match_flag_type m = match_default)

View File

@ -35,6 +35,8 @@ struct sub_match : public std::pair<BidiIterator, BidiIterator>
typedef typename re_detail::regex_iterator_traits<BidiIterator>::difference_type difference_type;
#endif
typedef BidiIterator iterator_type;
typedef BidiIterator iterator;
typedef BidiIterator const_iterator;
bool matched;

View File

@ -136,7 +136,7 @@ nl_catd message_cat = (nl_catd)-1;
unsigned int message_count = 0;
std::string* mess_locale;
BOOST_REGEX_DECL char* re_custom_error_messages[] = {
char* re_custom_error_messages[] = {
0,
0,
0,
@ -182,8 +182,8 @@ std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::si
#ifndef BOOST_NO_WREGEX
BOOST_REGEX_DECL boost::regex_wchar_type re_zero_w;
BOOST_REGEX_DECL boost::regex_wchar_type re_ten_w;
boost::regex_wchar_type re_zero_w;
boost::regex_wchar_type re_ten_w;
unsigned int nlsw_count = 0;
std::string* wlocale_name = 0;

View File

@ -379,7 +379,7 @@ int BOOST_REGEX_CALL cpp_regex_traits<char>::toi(char c)const
int BOOST_REGEX_CALL cpp_regex_traits<char>::toi(const char*& first, const char* last, int radix)const
{
pmd->sbuf.pubsetbuf(const_cast<char*>(first), static_cast<std::streamsize>(last-first));
pmd->sbuf.pubsetbuf(const_cast<char*>(static_cast<const char*>(first)), static_cast<std::streamsize>(last-first));
pmd->is.clear();
if(std::abs(radix) == 16) pmd->is >> std::hex;
else if(std::abs(radix) == 8) pmd->is >> std::oct;

View File

@ -21,6 +21,7 @@
#include <boost/cregex.hpp>
#include <boost/regex.hpp>
#include <boost/integer_traits.hpp>
#if !defined(BOOST_NO_STD_STRING)
#include <map>
#include <list>
@ -113,7 +114,7 @@ void RegExData::update()
for(unsigned int i = 0; i < m.size(); ++i)
{
if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second);
positions[i] = m[i].matched ? m[i].first - pbase : -1;
positions[i] = m[i].matched ? m[i].first - pbase : RegEx::npos;
}
}
#ifndef BOOST_REGEX_NO_FILEITER
@ -122,7 +123,7 @@ void RegExData::update()
for(unsigned int i = 0; i < fm.size(); ++i)
{
if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second);
positions[i] = fm[i].matched ? fm[i].first - fbase : -1;
positions[i] = fm[i].matched ? fm[i].first - fbase : RegEx::npos;
}
}
#endif
@ -585,7 +586,13 @@ std::string RegEx::What(int i)const
return result;
}
const unsigned int RegEx::npos = ~0u;
#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
const std::size_t RegEx::npos = ::boost::integer_traits<std::size_t>::const_max;
#elif defined(BOOST_HAS_LONG_LONG)
const std::size_t RegEx::npos = ~0ULL;
#else
const std::size_t RegEx::npos = ~0UL;
#endif
} // namespace boost

View File

@ -108,7 +108,7 @@ std::list<collate_name_t>* pcoll_names = 0;
HINSTANCE hresmod = 0;
BOOST_REGEX_DECL char* re_custom_error_messages[] = {
char* re_custom_error_messages[] = {
0,
0,
0,
@ -147,8 +147,8 @@ enum syntax_map_size
#ifndef BOOST_NO_WREGEX
BOOST_REGEX_DECL boost::regex_wchar_type re_zero_w;
BOOST_REGEX_DECL boost::regex_wchar_type re_ten_w;
boost::regex_wchar_type re_zero_w;
boost::regex_wchar_type re_ten_w;
bool isPlatformNT = false;

View File

@ -359,7 +359,7 @@ void cpp_tests(const basic_regex<C, T, A>& e, bool recurse = true)
(m[-2].first - x) << "," << (m[-2].second - x) << ") expected (" <<
matches[1] << "," << (y-x) << ")" << endl;
}
#if !(defined(BOOST_MSVC) && (BOOST_MSVC <= 1300)) && !defined(BOOST_REGEX_V3)
#if !(defined(BOOST_MSVC) && (BOOST_MSVC <= 1300)) && !defined(BOOST_REGEX_V3) && !BOOST_WORKAROUND(__HP_aCC, BOOST_TESTED_AT(55500))
//
// now try comparison operators:
string_type s(m[0]);
@ -593,7 +593,7 @@ hl_grep_test_proc(const RegEx& e)
// check $`:
start = e.Position(-1);
end = start + e.Length(-1);
if(start == -1)
if(start == boost::RegEx::npos)
{
if(hl_match_id &&
( matches[hl_match_id] != matches[hl_match_id - 1] )
@ -628,7 +628,7 @@ hl_grep_test_proc(const RegEx& e)
// check $':
start = e.Position(-2);
end = start + e.Length(-2);
if(start == -1)
if(start == boost::RegEx::npos)
{
if(matches[hl_match_id + 1] != (int)search_text.size())
{

View File

@ -877,6 +877,10 @@ a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,ab*abbb?"
- match_default normal REG_EXTENDED REG_PERL
a** !
a*? aa 0 0
^a*?$ aa 0 2
^.*?$ aa 0 2
^(?:a)*?$ aa 0 2
^[ab]*?$ aa 0 2
a?? aa 0 0
a++ !
a+? aa 0 1
@ -1014,6 +1018,7 @@ ab.{2,5}? ab__ 0 4
ab.{2,5}? ab_______ 0 4
ab.{2,5}?xy ab______xy -1 -1
ab.{2,5}xy ab_xy -1 -1
(.*?).somesite \n\n555.somesite 2 14 2 5
; now again for single character repeats:
@ -1050,6 +1055,7 @@ ab_{2,5}? ab__ 0 4
ab_{2,5}? ab_______ 0 4
ab_{2,5}?xy ab______xy -1 -1
ab_{2,5}xy ab_xy -1 -1
(5*?).somesite //555.somesite 2 14 2 5
; and again for sets:
ab[_,;]*xy abxy_ 0 4
@ -1085,6 +1091,7 @@ ab[_,;]{2,5}? ab__ 0 4
ab[_,;]{2,5}? ab_______ 0 4
ab[_,;]{2,5}?xy ab______xy -1 -1
ab[_,;]{2,5}xy ab_xy -1 -1
(\d*?).somesite //555.somesite 2 14 2 5
; and again for tricky sets with digraphs:
ab[_[.ae.]]*xy abxy_ 0 4
@ -1120,6 +1127,7 @@ ab[_[.ae.]]{2,5}? ab__ 0 4
ab[_[.ae.]]{2,5}? ab_______ 0 4
ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
ab[_[.ae.]]{2,5}xy ab_xy -1 -1
([5[.ae.]]*?).somesite //555.somesite 2 14 2 5
; new bugs detected in spring 2003:
- normal match_continuous REG_NO_POSIX_TEST