Changed regex_token_iterator to return a sub_match rather than a std::string (it's more efficient).

[SVN r20613]
This commit is contained in:
John Maddock
2003-11-03 11:25:30 +00:00
parent de9338cedf
commit 23f71d7727
4 changed files with 62 additions and 22 deletions

View File

@ -30,11 +30,12 @@
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter; <P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence, enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position and presenting one or more character sequence for each match found. Each
enumerated by the iterator is a string that represents what matched a position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE> object that represents what matched a particular sub-expression within the
is used to enumerate a single sub-expression with index -1, then the iterator regular expression. When class <CODE>regex_token_iterator</CODE> is used to
performs field splitting: that is to say it enumerates one string for each enumerate a single sub-expression with index -1, then the iterator performs
field splitting: that is to say it enumerates one character sequence for each
section of the character container sequence that does not match the regular section of the character container sequence that does not match the regular
expression specified.</P> expression specified.</P>
<PRE> <PRE>
@ -46,7 +47,8 @@ class regex_token_iterator
{ {
public: public:
typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type; typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type; typedef <A href="sub_match.html">sub_match</A>
&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type; typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer; typedef const value_type* pointer;
typedef const value_type&amp; reference; typedef const value_type&amp; reference;
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P> <B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE> <PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
<P> <P>
<B>Effects: </B>returns the current string being enumerated.</P> <B>Effects: </B>returns the current character sequence being enumerated.</P>
<PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE> <PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
<P> <P>
<B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P> <B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
<PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE> <PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
<P> <P>
<B>Effects: </B>Moves on to the next string to be enumerated.</P> <B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if or if the program runs out of stack space while matching the expression (if
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
24 Oct 2003 24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p> <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998- <p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License, <P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A> Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P> or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -30,11 +30,12 @@
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter; <P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
that is to say it represents a new view of an existing iterator sequence, by that is to say it represents a new view of an existing iterator sequence, by
enumerating all the occurrences of a regular expression within that sequence, enumerating all the occurrences of a regular expression within that sequence,
and presenting one or more new strings for each match found. Each position and presenting one or more character sequence for each match found. Each
enumerated by the iterator is a string that represents what matched a position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE> object that represents what matched a particular sub-expression within the
is used to enumerate a single sub-expression with index -1, then the iterator regular expression. When class <CODE>regex_token_iterator</CODE> is used to
performs field splitting: that is to say it enumerates one string for each enumerate a single sub-expression with index -1, then the iterator performs
field splitting: that is to say it enumerates one character sequence for each
section of the character container sequence that does not match the regular section of the character container sequence that does not match the regular
expression specified.</P> expression specified.</P>
<PRE> <PRE>
@ -46,7 +47,8 @@ class regex_token_iterator
{ {
public: public:
typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type; typedef <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt; regex_type;
typedef basic_string&lt;charT&gt; value_type; typedef <A href="sub_match.html">sub_match</A>
&lt;BidirectionalIterator&gt; value_type;
typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type; typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
typedef const value_type* pointer; typedef const value_type* pointer;
typedef const value_type&amp; reference; typedef const value_type&amp; reference;
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P> <B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
<PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE> <PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
<P> <P>
<B>Effects: </B>returns the current string being enumerated.</P> <B>Effects: </B>returns the current character sequence being enumerated.</P>
<PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE> <PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
<P> <P>
<B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P> <B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
<PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE> <PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
<P> <P>
<B>Effects: </B>Moves on to the next string to be enumerated.</P> <B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
or if the program runs out of stack space while matching the expression (if or if the program runs out of stack space while matching the expression (if
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
24 Oct 2003 24 Oct 2003
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p> <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
<p><i><EFBFBD> Copyright John Maddock&nbsp;1998- <p><i><EFBFBD> Copyright John Maddock&nbsp;1998-
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
<P><I>Use, modification and distribution are subject to the Boost Software License, <P><I>Use, modification and distribution are subject to the Boost Software License,
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A> Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P> or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>

View File

@ -209,6 +209,9 @@ using std::distance;
# ifdef BOOST_REGEX_DYN_LINK # ifdef BOOST_REGEX_DYN_LINK
# define BOOST_DYN_LINK # define BOOST_DYN_LINK
# endif # endif
#ifdef BOOST_REGEX_DIAG
# define BOOST_LIB_DIAGNOSTIC
#endif
# include <boost/config/auto_link.hpp> # include <boost/config/auto_link.hpp>
#endif #endif

View File

@ -44,6 +44,9 @@ namespace boost{
# pragma warning(disable:4700) # pragma warning(disable:4700)
#endif #endif
// testing
#define TEST_BECKER_INTERFACE
template <class BidirectionalIterator, template <class BidirectionalIterator,
class charT, class charT,
class traits, class traits,
@ -51,13 +54,17 @@ template <class BidirectionalIterator,
class regex_token_iterator_implementation class regex_token_iterator_implementation
{ {
typedef basic_regex<charT, traits, Allocator> regex_type; typedef basic_regex<charT, traits, Allocator> regex_type;
#ifdef TEST_BECKER_INTERFACE
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type; typedef std::basic_string<charT> value_type;
#endif
match_results<BidirectionalIterator> what; // current match match_results<BidirectionalIterator> what; // current match
BidirectionalIterator end; // end of search area BidirectionalIterator end; // end of search area
const regex_type* pre; // the expression const regex_type* pre; // the expression
match_flag_type flags; // match flags match_flag_type flags; // match flags
std::basic_string<charT> result; // the current string result value_type result; // the current string result
int N; // the current sub-expression being enumerated int N; // the current sub-expression being enumerated
std::vector<int> subs; // the sub-expressions to enumerate std::vector<int> subs; // the sub-expressions to enumerate
@ -99,12 +106,22 @@ public:
if(regex_search(first, end, what, *pre, flags) == true) if(regex_search(first, end, what, *pre, flags) == true)
{ {
N = 0; N = 0;
#ifdef TEST_BECKER_INTERFACE
result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]);
#else
result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str())); result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
#endif
return true; return true;
} }
else if((subs[N] == -1) && (first != end)) else if((subs[N] == -1) && (first != end))
{ {
#ifdef TEST_BECKER_INTERFACE
result.first = first;
result.second = end;
result.matched = true;
#else
result = value_type(first, end); result = value_type(first, end);
#endif
return true; return true;
} }
return false; return false;
@ -119,7 +136,7 @@ public:
&& (what[0].first == that.what[0].first) && (what[0].first == that.what[0].first)
&& (what[0].second == that.what[0].second); && (what[0].second == that.what[0].second);
} }
const std::basic_string<charT>& get() const value_type& get()
{ return result; } { return result; }
bool next() bool next()
{ {
@ -128,7 +145,11 @@ public:
if(N+1 < (int)subs.size()) if(N+1 < (int)subs.size())
{ {
++N; ++N;
#ifdef TEST_BECKER_INTERFACE
result =((subs[N] == -1) ? what.prefix().first : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second)); result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true; return true;
} }
if(what.prefix().first != what[0].second) if(what.prefix().first != what[0].second)
@ -137,13 +158,23 @@ public:
if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags))) if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
{ {
N =0; N =0;
#ifdef TEST_BECKER_INTERFACE
result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
#else
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second)); result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
#endif
return true; return true;
} }
else if((last_end != end) && (subs[0] == -1)) else if((last_end != end) && (subs[0] == -1))
{ {
N =-1; N =-1;
#ifdef TEST_BECKER_INTERFACE
result.first = last_end;
result.second = end;
result.matched = true;
#else
result = value_type(last_end, end); result = value_type(last_end, end);
#endif
return true; return true;
} }
return false; return false;
@ -161,7 +192,11 @@ private:
typedef shared_ptr<impl> pimpl; typedef shared_ptr<impl> pimpl;
public: public:
typedef basic_regex<charT, traits, Allocator> regex_type; typedef basic_regex<charT, traits, Allocator> regex_type;
#ifdef TEST_BECKER_INTERFACE
typedef sub_match<BidirectionalIterator> value_type;
#else
typedef std::basic_string<charT> value_type; typedef std::basic_string<charT> value_type;
#endif
typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type
difference_type; difference_type;
typedef const value_type* pointer; typedef const value_type* pointer;