Changed regex_token_iterator to return a sub_match rather than a std::string (it's more efficient).

[SVN r20613]
This commit is contained in:
John Maddock
2003-11-03 11:25:30 +00:00
parent de9338cedf
commit 23f71d7727
4 changed files with 62 additions and 22 deletions

View File

@ -30,11 +30,12 @@
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position
enumerated by the iterator is a string that represents what matched a
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
is used to enumerate a single sub-expression with index -1, then the iterator
performs field splitting: that is to say it enumerates one string for each
and presenting one or more character sequence for each match found. Each
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
object that represents what matched a particular sub-expression within the
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
enumerate a single sub-expression with index -1, then the iterator performs
field splitting: that is to say it enumerates one character sequence for each
section of the character container sequence that does not match the regular
expression specified.</P>
<PRE>
@ -46,7 +47,8 @@ class regex_token_iterator
{
public:
typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type;
typedef <A href="sub_match.html">sub_match</A>
&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
<P>
<B>Effects: </B>returns the current string being enumerated.</P>
<B>Effects: </B>returns the current character sequence being enumerated.</P>
<PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
<P>
<B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
<PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
<P>
<B>Effects: </B>Moves on to the next string to be enumerated.</P>
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -30,11 +30,12 @@
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position
enumerated by the iterator is a string that represents what matched a
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
is used to enumerate a single sub-expression with index -1, then the iterator
performs field splitting: that is to say it enumerates one string for each
and presenting one or more character sequence for each match found. Each
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
object that represents what matched a particular sub-expression within the
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
enumerate a single sub-expression with index -1, then the iterator performs
field splitting: that is to say it enumerates one character sequence for each
section of the character container sequence that does not match the regular
expression specified.</P>
<PRE>
@ -46,7 +47,8 @@ class regex_token_iterator
{
public:
typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type;
typedef <A href="sub_match.html">sub_match</A>
&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer;
typedef const value_type&amp; reference;
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
<P>
<B>Effects: </B>returns the current string being enumerated.</P>
<B>Effects: </B>returns the current character sequence being enumerated.</P>
<PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
<P>
<B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
<PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
<P>
<B>Effects: </B>Moves on to the next string to be enumerated.</P>
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -209,6 +209,9 @@ using std::distance;
# ifdef BOOST_REGEX_DYN_LINK
# define BOOST_DYN_LINK
# endif
#ifdef BOOST_REGEX_DIAG
# define BOOST_LIB_DIAGNOSTIC
#endif
# include <boost/config/auto_link.hpp>
#endif

View File

@ -44,6 +44,9 @@ namespace boost{
# pragma warning(disable:4700)
#endif
// testing
#define TEST_BECKER_INTERFACE
template <class BidirectionalIterator,
class charT,
class traits,
@ -51,13 +54,17 @@ template <class BidirectionalIterator,
class regex_token_iterator_implementation
{
typedef basic_regex<charT, traits, Allocator> regex_type;
#ifdef TEST_BECKER_INTERFACE
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type;
#endif
match_results<BidirectionalIterator> what; // current match
BidirectionalIterator end; // end of search area
const regex_type* pre; // the expression
match_flag_type flags; // match flags
std::basic_string<charT> result; // the current string result
value_type result; // the current string result
int N; // the current sub-expression being enumerated
std::vector<int> subs; // the sub-expressions to enumerate
@ -99,12 +106,22 @@ public:
if(regex_search(first, end, what, *pre, flags) == true)
{
N = 0;
#ifdef TEST_BECKER_INTERFACE
result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]);
#else
result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
#endif
return true;
}
else if((subs[N] == -1) && (first != end))
{
#ifdef TEST_BECKER_INTERFACE
result.first = first;
result.second = end;
result.matched = true;
#else
result = value_type(first, end);
#endif
return true;
}
return false;
@ -119,7 +136,7 @@ public:
&& (what[0].first == that.what[0].first)
&& (what[0].second == that.what[0].second);
}
const std::basic_string<charT>& get()
const value_type& get()
{ return result; }
bool next()
{
@ -128,7 +145,11 @@ public:
if(N+1 < (int)subs.size())
{
++N;
#ifdef TEST_BECKER_INTERFACE
result =((subs[N] == -1) ? what.prefix().first : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true;
}
if(what.prefix().first != what[0].second)
@ -137,13 +158,23 @@ public:
if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
{
N =0;
#ifdef TEST_BECKER_INTERFACE
result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true;
}
else if((last_end != end) && (subs[0] == -1))
{
N =-1;
#ifdef TEST_BECKER_INTERFACE
result.first = last_end;
result.second = end;
result.matched = true;
#else
result = value_type(last_end, end);
#endif
return true;
}
return false;
@ -161,7 +192,11 @@ private:
typedef shared_ptr<impl> pimpl;
public:
typedef basic_regex<charT, traits, Allocator> regex_type;
#ifdef TEST_BECKER_INTERFACE
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type;
#endif
typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type
difference_type;
typedef const value_type* pointer;