mirror of
https://github.com/boostorg/regex.git
synced 2025-07-23 17:17:22 +02:00
Added missing preconditions, and fixed minor typos.
[SVN r22529]
This commit is contained in:
@ -76,7 +76,7 @@ typedef regex_token_iterator<const char*> cregex_token_i
|
|||||||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||||||
#ifndef BOOST_NO_WREGEX
|
#ifndef BOOST_NO_WREGEX
|
||||||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||||
#endif
|
#endif
|
||||||
</PRE>
|
</PRE>
|
||||||
<H3><A name="description"></A>Description</H3>
|
<H3><A name="description"></A>Description</H3>
|
||||||
@ -84,7 +84,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||||||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||||
|
for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||||||
string for each regular expression match of the expression <EM>re</EM> found
|
string for each regular expression match of the expression <EM>re</EM> found
|
||||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||||||
@ -99,7 +100,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
configured</A> in non-recursive mode).</P>
|
configured</A> in non-recursive mode).</P>
|
||||||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.</P>
|
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||||||
|
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||||||
strings for each regular expression match of the expression <EM>re</EM> found
|
strings for each regular expression match of the expression <EM>re</EM> found
|
||||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||||||
@ -118,7 +120,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
<PRE><A name=c4></A>template <std::size_t N>
|
<PRE><A name=c4></A>template <std::size_t N>
|
||||||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||||
|
for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||||||
enumerate <EM>R</EM> strings for each regular expression match of the
|
enumerate <EM>R</EM> strings for each regular expression match of the
|
||||||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||||||
|
@ -91,18 +91,18 @@
|
|||||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||||
and to mark what generated the match. For example the expression "(ab)*" would
|
and to mark what generated the match. For example the expression "(ab)*" would
|
||||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||||
regex_match</A> and <A href="regex_search.html">regex_search</A>
|
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||||
each take an instance of <A href="match_results.html">match_results</A>
|
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||||
that reports what caused the match, on exit from these functions the <A href="match_results.html">
|
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||||
match_results</A> contains information both on what the whole expression
|
contains information both on what the whole expression matched and on what each
|
||||||
matched and on what each sub-expression matched. In the example above
|
sub-expression matched. In the example above match_results[1] would contain a
|
||||||
match_results[1] would contain a pair of iterators denoting the final "ab" of
|
pair of iterators denoting the final "ab" of the matching string. It is
|
||||||
the matching string. It is permissible for sub-expressions to match null
|
permissible for sub-expressions to match null strings. If a sub-expression
|
||||||
strings. If a sub-expression takes no part in a match - for example if it is
|
takes no part in a match - for example if it is part of an alternative that is
|
||||||
part of an alternative that is not taken - then both of the iterators that are
|
not taken - then both of the iterators that are returned for that
|
||||||
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
|
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||||
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
|
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||||
from left to right starting from 1, sub-expression 0 is the whole expression.
|
to right starting from 1, sub-expression 0 is the whole expression.
|
||||||
</P>
|
</P>
|
||||||
<H3>Non-Marking Parenthesis
|
<H3>Non-Marking Parenthesis
|
||||||
</H3>
|
</H3>
|
||||||
@ -143,7 +143,7 @@
|
|||||||
<P>A set is a set of characters that can match any single character that is a
|
<P>A set is a set of characters that can match any single character that is a
|
||||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||||
character ranges, character classes, collating elements and equivalence
|
character ranges, character classes, collating elements and equivalence
|
||||||
classes. Set declarations that start with "^" contain the compliment of the
|
classes. Set declarations that start with "^" contain the complement of the
|
||||||
elements that follow.
|
elements that follow.
|
||||||
</P>
|
</P>
|
||||||
<P>Examples:
|
<P>Examples:
|
||||||
@ -293,7 +293,7 @@
|
|||||||
[^[.ae.]] would only match one character.
|
[^[.ae.]] would only match one character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Equivalence classes take the general form[=tagname=] inside a set declaration,
|
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||||
where <I>tagname</I> is either a single character, or a name of a collating
|
where <I>tagname</I> is either a single character, or a name of a collating
|
||||||
element, and matches any character that is a member of the same primary
|
element, and matches any character that is a member of the same primary
|
||||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||||
@ -302,7 +302,7 @@
|
|||||||
typically collated by character, then by accent, and then by case; the primary
|
typically collated by character, then by accent, and then by case; the primary
|
||||||
sort key then relates to the character, the secondary to the accentation, and
|
sort key then relates to the character, the secondary to the accentation, and
|
||||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||||
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||||
locale independent method of obtaining the primary sort key for a character,
|
locale independent method of obtaining the primary sort key for a character,
|
||||||
except under Win32. For other operating systems the library will "guess" the
|
except under Win32. For other operating systems the library will "guess" the
|
||||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||||
@ -666,106 +666,103 @@
|
|||||||
<H3>What gets matched?
|
<H3>What gets matched?
|
||||||
</H3>
|
</H3>
|
||||||
<P>
|
<P>
|
||||||
When the expression is compiled as a Perl-compatible regex then the matching
|
When the expression is compiled as a Perl-compatible regex then the matching
|
||||||
algorithms will perform a depth first search on the state machine and report
|
algorithms will perform a depth first search on the state machine and report
|
||||||
the first match found.</P>
|
the first match found.</P>
|
||||||
<P>
|
<P>
|
||||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||||
algorithms will match the first possible matching string, if more than one
|
algorithms will match the first possible matching string, if more than one
|
||||||
string starting at a given location can match then it matches the longest
|
string starting at a given location can match then it matches the longest
|
||||||
possible string, unless the flag match_any is set, in which case the first
|
possible string, unless the flag match_any is set, in which case the first
|
||||||
match encountered is returned. Use of the match_any option can reduce the time
|
match encountered is returned. Use of the match_any option can reduce the time
|
||||||
taken to find the match - but is only useful if the user is less concerned
|
taken to find the match - but is only useful if the user is less concerned
|
||||||
about what matched - for example it would not be suitable for search and
|
about what matched - for example it would not be suitable for search and
|
||||||
replace operations. In cases where their are multiple possible matches all
|
replace operations. In cases where their are multiple possible matches all
|
||||||
starting at the same location, and all of the same length, then the match
|
starting at the same location, and all of the same length, then the match
|
||||||
chosen is the one with the longest first sub-expression, if that is the same
|
chosen is the one with the longest first sub-expression, if that is the same
|
||||||
for two or more matches, then the second sub-expression will be examined and so
|
for two or more matches, then the second sub-expression will be examined and so
|
||||||
on.
|
on.
|
||||||
</P><P>
|
|
||||||
The following table examples illustrate the main differences between Perl and
|
|
||||||
POSIX regular expression matching rules:
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
The following table examples illustrate the main differences between Perl and
|
||||||
<TBODY>
|
POSIX regular expression matching rules:
|
||||||
<TR>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>Expression</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>Text</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>POSIX leftmost longest match</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>ECMAScript depth first search match</P>
|
|
||||||
</TD>
|
|
||||||
</TR>
|
|
||||||
<TR>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P><CODE>a|ab</CODE></P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P><CODE>
|
|
||||||
xaby</CODE>
|
|
||||||
</P>
|
</P>
|
||||||
</TD>
|
<P>
|
||||||
<TD vAlign="top" width="25%">
|
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||||
<P><CODE>
|
<TBODY>
|
||||||
"ab"</CODE></P></TD>
|
<TR>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P>Expression</P>
|
||||||
"a"</CODE></P></TD>
|
</TD>
|
||||||
</TR>
|
<TD vAlign="top" width="25%">
|
||||||
<TR>
|
<P>Text</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P><CODE>
|
<TD vAlign="top" width="25%">
|
||||||
.*([[:alnum:]]+).*</CODE></P></TD>
|
<P>POSIX leftmost longest match</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P><CODE>
|
<TD vAlign="top" width="25%">
|
||||||
" abc def xyz "</CODE></P></TD>
|
<P>ECMAScript depth first search match</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P>$0 = " abc def xyz "<BR>
|
</TR>
|
||||||
$1 = "abc"</P>
|
<TR>
|
||||||
</TD>
|
<TD vAlign="top" width="25%">
|
||||||
<TD vAlign="top" width="25%">
|
<P><CODE>a|ab</CODE></P>
|
||||||
<P>$0 = " abc def xyz "<BR>
|
</TD>
|
||||||
$1 = "z"</P>
|
<TD vAlign="top" width="25%">
|
||||||
</TD>
|
<P><CODE> xaby</CODE>
|
||||||
</TR>
|
</P>
|
||||||
<TR>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P><CODE> "ab"</CODE></P>
|
||||||
.*(a|xayy)</CODE></P></TD>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P><CODE> "a"</CODE></P>
|
||||||
zzxayyzz</CODE></P></TD>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
</TR>
|
||||||
<P><CODE>
|
<TR>
|
||||||
"zzxayy"</CODE></P></TD>
|
<TD vAlign="top" width="25%">
|
||||||
<TD vAlign="top" width="25%">
|
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||||
<P><CODE>"zzxa"</CODE></P>
|
</TD>
|
||||||
</TD>
|
<TD vAlign="top" width="25%">
|
||||||
</TR>
|
<P><CODE> " abc def xyz "</CODE></P>
|
||||||
</TBODY></CODE></TD></TR></TABLE>
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P>$0 = " abc def xyz "<BR>
|
||||||
|
$1 = "abc"</P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P>$0 = " abc def xyz "<BR>
|
||||||
|
$1 = "z"</P>
|
||||||
|
</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> .*(a|xayy)</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> zzxayyzz</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> "zzxayy"</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE>"zzxa"</CODE></P>
|
||||||
|
</TD>
|
||||||
|
</TR>
|
||||||
|
</TBODY></CODE></TD></TR></TABLE>
|
||||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||||
that these two regular expression syntaxes differ not only in the features
|
that these two regular expression syntaxes differ not only in the features
|
||||||
offered, but also in the form that the state machine takes and/or the
|
offered, but also in the form that the state machine takes and/or the
|
||||||
algorithms used to traverse the state machine.</p>
|
algorithms used to traverse the state machine.</P>
|
||||||
<HR>
|
<HR>
|
||||||
<p>Revised
|
<p>Revised
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||||
24 Oct 2003
|
24 Oct 2003
|
||||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
|
||||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ typedef regex_token_iterator<const char*> cregex_token_i
|
|||||||
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
|
||||||
#ifndef BOOST_NO_WREGEX
|
#ifndef BOOST_NO_WREGEX
|
||||||
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
|
||||||
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
|
||||||
#endif
|
#endif
|
||||||
</PRE>
|
</PRE>
|
||||||
<H3><A name="description"></A>Description</H3>
|
<H3><A name="description"></A>Description</H3>
|
||||||
@ -84,7 +84,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
<P><B> Effects:</B> constructs an end of sequence iterator.</P>
|
||||||
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
<PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
int submatch = 0, match_flag_type m = match_default);</PRE>
|
int submatch = 0, match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||||
|
for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one
|
||||||
string for each regular expression match of the expression <EM>re</EM> found
|
string for each regular expression match of the expression <EM>re</EM> found
|
||||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. The
|
||||||
@ -99,7 +100,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
configured</A> in non-recursive mode).</P>
|
configured</A> in non-recursive mode).</P>
|
||||||
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
<PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
const std::vector<int>& submatches, match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.</P>
|
<P><B> Preconditions:</B> <CODE>submatches.size() && !re.empty()</CODE>.
|
||||||
|
Object re shall exist for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
<P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
|
||||||
strings for each regular expression match of the expression <EM>re</EM> found
|
strings for each regular expression match of the expression <EM>re</EM> found
|
||||||
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>. For
|
||||||
@ -118,7 +120,8 @@ typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_
|
|||||||
<PRE><A name=c4></A>template <std::size_t N>
|
<PRE><A name=c4></A>template <std::size_t N>
|
||||||
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re,
|
||||||
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
const int (&submatches)[R], match_flag_type m = match_default);</PRE>
|
||||||
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>.</P>
|
<P><B> Preconditions: </B><CODE>!re.empty()</CODE>. Object re shall exist
|
||||||
|
for the lifetime of the iterator constructed from it.</P>
|
||||||
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
<P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will
|
||||||
enumerate <EM>R</EM> strings for each regular expression match of the
|
enumerate <EM>R</EM> strings for each regular expression match of the
|
||||||
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match
|
||||||
|
199
doc/syntax.html
199
doc/syntax.html
@ -91,18 +91,18 @@
|
|||||||
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
<P>Parentheses serve two purposes, to group items together into a sub-expression,
|
||||||
and to mark what generated the match. For example the expression "(ab)*" would
|
and to mark what generated the match. For example the expression "(ab)*" would
|
||||||
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
match all of the string "ababab". The matching algorithms <A href="regex_match.html">
|
||||||
regex_match</A> and <A href="regex_search.html">regex_search</A>
|
regex_match</A> and <A href="regex_search.html">regex_search</A> each take
|
||||||
each take an instance of <A href="match_results.html">match_results</A>
|
an instance of <A href="match_results.html">match_results</A> that reports what
|
||||||
that reports what caused the match, on exit from these functions the <A href="match_results.html">
|
caused the match, on exit from these functions the <A href="match_results.html">match_results</A>
|
||||||
match_results</A> contains information both on what the whole expression
|
contains information both on what the whole expression matched and on what each
|
||||||
matched and on what each sub-expression matched. In the example above
|
sub-expression matched. In the example above match_results[1] would contain a
|
||||||
match_results[1] would contain a pair of iterators denoting the final "ab" of
|
pair of iterators denoting the final "ab" of the matching string. It is
|
||||||
the matching string. It is permissible for sub-expressions to match null
|
permissible for sub-expressions to match null strings. If a sub-expression
|
||||||
strings. If a sub-expression takes no part in a match - for example if it is
|
takes no part in a match - for example if it is part of an alternative that is
|
||||||
part of an alternative that is not taken - then both of the iterators that are
|
not taken - then both of the iterators that are returned for that
|
||||||
returned for that sub-expression point to the end of the input string, and the <I>matched</I>
|
sub-expression point to the end of the input string, and the <I>matched</I> parameter
|
||||||
parameter for that sub-expression is <I>false</I>. Sub-expressions are indexed
|
for that sub-expression is <I>false</I>. Sub-expressions are indexed from left
|
||||||
from left to right starting from 1, sub-expression 0 is the whole expression.
|
to right starting from 1, sub-expression 0 is the whole expression.
|
||||||
</P>
|
</P>
|
||||||
<H3>Non-Marking Parenthesis
|
<H3>Non-Marking Parenthesis
|
||||||
</H3>
|
</H3>
|
||||||
@ -143,7 +143,7 @@
|
|||||||
<P>A set is a set of characters that can match any single character that is a
|
<P>A set is a set of characters that can match any single character that is a
|
||||||
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
member of the set. Sets are delimited by "[" and "]" and can contain literals,
|
||||||
character ranges, character classes, collating elements and equivalence
|
character ranges, character classes, collating elements and equivalence
|
||||||
classes. Set declarations that start with "^" contain the compliment of the
|
classes. Set declarations that start with "^" contain the complement of the
|
||||||
elements that follow.
|
elements that follow.
|
||||||
</P>
|
</P>
|
||||||
<P>Examples:
|
<P>Examples:
|
||||||
@ -293,7 +293,7 @@
|
|||||||
[^[.ae.]] would only match one character.
|
[^[.ae.]] would only match one character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Equivalence classes take the general form[=tagname=] inside a set declaration,
|
Equivalence classes take the generalform[=tagname=] inside a set declaration,
|
||||||
where <I>tagname</I> is either a single character, or a name of a collating
|
where <I>tagname</I> is either a single character, or a name of a collating
|
||||||
element, and matches any character that is a member of the same primary
|
element, and matches any character that is a member of the same primary
|
||||||
equivalence class as the collating element [.tagname.]. An equivalence class is
|
equivalence class as the collating element [.tagname.]. An equivalence class is
|
||||||
@ -302,7 +302,7 @@
|
|||||||
typically collated by character, then by accent, and then by case; the primary
|
typically collated by character, then by accent, and then by case; the primary
|
||||||
sort key then relates to the character, the secondary to the accentation, and
|
sort key then relates to the character, the secondary to the accentation, and
|
||||||
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
the tertiary to the case). If there is no equivalence class corresponding to <I>tagname</I>
|
||||||
, then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
,then[=tagname=] is exactly the same as [.tagname.]. Unfortunately there is no
|
||||||
locale independent method of obtaining the primary sort key for a character,
|
locale independent method of obtaining the primary sort key for a character,
|
||||||
except under Win32. For other operating systems the library will "guess" the
|
except under Win32. For other operating systems the library will "guess" the
|
||||||
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
primary sort key from the full sort key (obtained from <I>strxfrm</I>), so
|
||||||
@ -666,106 +666,103 @@
|
|||||||
<H3>What gets matched?
|
<H3>What gets matched?
|
||||||
</H3>
|
</H3>
|
||||||
<P>
|
<P>
|
||||||
When the expression is compiled as a Perl-compatible regex then the matching
|
When the expression is compiled as a Perl-compatible regex then the matching
|
||||||
algorithms will perform a depth first search on the state machine and report
|
algorithms will perform a depth first search on the state machine and report
|
||||||
the first match found.</P>
|
the first match found.</P>
|
||||||
<P>
|
<P>
|
||||||
When the expression is compiled as a POSIX-compatible regex then the matching
|
When the expression is compiled as a POSIX-compatible regex then the matching
|
||||||
algorithms will match the first possible matching string, if more than one
|
algorithms will match the first possible matching string, if more than one
|
||||||
string starting at a given location can match then it matches the longest
|
string starting at a given location can match then it matches the longest
|
||||||
possible string, unless the flag match_any is set, in which case the first
|
possible string, unless the flag match_any is set, in which case the first
|
||||||
match encountered is returned. Use of the match_any option can reduce the time
|
match encountered is returned. Use of the match_any option can reduce the time
|
||||||
taken to find the match - but is only useful if the user is less concerned
|
taken to find the match - but is only useful if the user is less concerned
|
||||||
about what matched - for example it would not be suitable for search and
|
about what matched - for example it would not be suitable for search and
|
||||||
replace operations. In cases where their are multiple possible matches all
|
replace operations. In cases where their are multiple possible matches all
|
||||||
starting at the same location, and all of the same length, then the match
|
starting at the same location, and all of the same length, then the match
|
||||||
chosen is the one with the longest first sub-expression, if that is the same
|
chosen is the one with the longest first sub-expression, if that is the same
|
||||||
for two or more matches, then the second sub-expression will be examined and so
|
for two or more matches, then the second sub-expression will be examined and so
|
||||||
on.
|
on.
|
||||||
</P><P>
|
|
||||||
The following table examples illustrate the main differences between Perl and
|
|
||||||
POSIX regular expression matching rules:
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
The following table examples illustrate the main differences between Perl and
|
||||||
<TBODY>
|
POSIX regular expression matching rules:
|
||||||
<TR>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>Expression</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>Text</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>POSIX leftmost longest match</P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P>ECMAScript depth first search match</P>
|
|
||||||
</TD>
|
|
||||||
</TR>
|
|
||||||
<TR>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P><CODE>a|ab</CODE></P>
|
|
||||||
</TD>
|
|
||||||
<TD vAlign="top" width="25%">
|
|
||||||
<P><CODE>
|
|
||||||
xaby</CODE>
|
|
||||||
</P>
|
</P>
|
||||||
</TD>
|
<P>
|
||||||
<TD vAlign="top" width="25%">
|
<TABLE id="Table5" cellSpacing="1" cellPadding="7" width="624" border="1">
|
||||||
<P><CODE>
|
<TBODY>
|
||||||
"ab"</CODE></P></TD>
|
<TR>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P>Expression</P>
|
||||||
"a"</CODE></P></TD>
|
</TD>
|
||||||
</TR>
|
<TD vAlign="top" width="25%">
|
||||||
<TR>
|
<P>Text</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P><CODE>
|
<TD vAlign="top" width="25%">
|
||||||
.*([[:alnum:]]+).*</CODE></P></TD>
|
<P>POSIX leftmost longest match</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P><CODE>
|
<TD vAlign="top" width="25%">
|
||||||
" abc def xyz "</CODE></P></TD>
|
<P>ECMAScript depth first search match</P>
|
||||||
<TD vAlign="top" width="25%">
|
</TD>
|
||||||
<P>$0 = " abc def xyz "<BR>
|
</TR>
|
||||||
$1 = "abc"</P>
|
<TR>
|
||||||
</TD>
|
<TD vAlign="top" width="25%">
|
||||||
<TD vAlign="top" width="25%">
|
<P><CODE>a|ab</CODE></P>
|
||||||
<P>$0 = " abc def xyz "<BR>
|
</TD>
|
||||||
$1 = "z"</P>
|
<TD vAlign="top" width="25%">
|
||||||
</TD>
|
<P><CODE> xaby</CODE>
|
||||||
</TR>
|
</P>
|
||||||
<TR>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P><CODE> "ab"</CODE></P>
|
||||||
.*(a|xayy)</CODE></P></TD>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
<TD vAlign="top" width="25%">
|
||||||
<P><CODE>
|
<P><CODE> "a"</CODE></P>
|
||||||
zzxayyzz</CODE></P></TD>
|
</TD>
|
||||||
<TD vAlign="top" width="25%">
|
</TR>
|
||||||
<P><CODE>
|
<TR>
|
||||||
"zzxayy"</CODE></P></TD>
|
<TD vAlign="top" width="25%">
|
||||||
<TD vAlign="top" width="25%">
|
<P><CODE> .*([[:alnum:]]+).*</CODE></P>
|
||||||
<P><CODE>"zzxa"</CODE></P>
|
</TD>
|
||||||
</TD>
|
<TD vAlign="top" width="25%">
|
||||||
</TR>
|
<P><CODE> " abc def xyz "</CODE></P>
|
||||||
</TBODY></CODE></TD></TR></TABLE>
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P>$0 = " abc def xyz "<BR>
|
||||||
|
$1 = "abc"</P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P>$0 = " abc def xyz "<BR>
|
||||||
|
$1 = "z"</P>
|
||||||
|
</TD>
|
||||||
|
</TR>
|
||||||
|
<TR>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> .*(a|xayy)</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> zzxayyzz</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE> "zzxayy"</CODE></P>
|
||||||
|
</TD>
|
||||||
|
<TD vAlign="top" width="25%">
|
||||||
|
<P><CODE>"zzxa"</CODE></P>
|
||||||
|
</TD>
|
||||||
|
</TR>
|
||||||
|
</TBODY></CODE></TD></TR></TABLE>
|
||||||
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
<P>These differences between Perl matching rules, and POSIX matching rules, mean
|
||||||
that these two regular expression syntaxes differ not only in the features
|
that these two regular expression syntaxes differ not only in the features
|
||||||
offered, but also in the form that the state machine takes and/or the
|
offered, but also in the form that the state machine takes and/or the
|
||||||
algorithms used to traverse the state machine.</p>
|
algorithms used to traverse the state machine.</P>
|
||||||
<HR>
|
<HR>
|
||||||
<p>Revised
|
<p>Revised
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||||
24 Oct 2003
|
24 Oct 2003
|
||||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
|
||||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user