mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 13:52:17 +02:00
Changed regex_token_iterator to return a sub_match rather than a std::string (it's more efficient).
[SVN r20613]
This commit is contained in:
@ -30,11 +30,12 @@
|
|||||||
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
||||||
that is to say it represents a new view of an existing iterator sequence, by
|
that is to say it represents a new view of an existing iterator sequence, by
|
||||||
enumerating all the occurrences of a regular expression within that sequence,
|
enumerating all the occurrences of a regular expression within that sequence,
|
||||||
and presenting one or more new strings for each match found. Each position
|
and presenting one or more character sequence for each match found. Each
|
||||||
enumerated by the iterator is a string that represents what matched a
|
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
|
||||||
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
|
object that represents what matched a particular sub-expression within the
|
||||||
is used to enumerate a single sub-expression with index -1, then the iterator
|
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
|
||||||
performs field splitting: that is to say it enumerates one string for each
|
enumerate a single sub-expression with index -1, then the iterator performs
|
||||||
|
field splitting: that is to say it enumerates one character sequence for each
|
||||||
section of the character container sequence that does not match the regular
|
section of the character container sequence that does not match the regular
|
||||||
expression specified.</P>
|
expression specified.</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
@ -46,7 +47,8 @@ class regex_token_iterator
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef <A href="basic_regex.html">basic_regex</A><charT, traits, Allocator> regex_type;
|
typedef <A href="basic_regex.html">basic_regex</A><charT, traits, Allocator> regex_type;
|
||||||
typedef basic_string<charT> value_type;
|
typedef <A href="sub_match.html">sub_match</A>
|
||||||
|
<BidirectionalIterator> value_type;
|
||||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||||
typedef const value_type* pointer;
|
typedef const value_type* pointer;
|
||||||
typedef const value_type& reference;
|
typedef const value_type& reference;
|
||||||
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
|
|||||||
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
||||||
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>returns the current string being enumerated.</P>
|
<B>Effects: </B>returns the current character sequence being enumerated.</P>
|
||||||
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
||||||
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>Moves on to the next string to be enumerated.</P>
|
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
|
||||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||||
or if the program runs out of stack space while matching the expression (if
|
or if the program runs out of stack space while matching the expression (if
|
||||||
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)
|
|||||||
24 Oct 2003
|
24 Oct 2003
|
||||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
|
||||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||||
|
@ -30,11 +30,12 @@
|
|||||||
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
<P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter;
|
||||||
that is to say it represents a new view of an existing iterator sequence, by
|
that is to say it represents a new view of an existing iterator sequence, by
|
||||||
enumerating all the occurrences of a regular expression within that sequence,
|
enumerating all the occurrences of a regular expression within that sequence,
|
||||||
and presenting one or more new strings for each match found. Each position
|
and presenting one or more character sequence for each match found. Each
|
||||||
enumerated by the iterator is a string that represents what matched a
|
position enumerated by the iterator is a <A href="sub_match.html">sub_match</A>
|
||||||
particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
|
object that represents what matched a particular sub-expression within the
|
||||||
is used to enumerate a single sub-expression with index -1, then the iterator
|
regular expression. When class <CODE>regex_token_iterator</CODE> is used to
|
||||||
performs field splitting: that is to say it enumerates one string for each
|
enumerate a single sub-expression with index -1, then the iterator performs
|
||||||
|
field splitting: that is to say it enumerates one character sequence for each
|
||||||
section of the character container sequence that does not match the regular
|
section of the character container sequence that does not match the regular
|
||||||
expression specified.</P>
|
expression specified.</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
@ -46,7 +47,8 @@ class regex_token_iterator
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef <A href="basic_regex.html">basic_regex</A><charT, traits, Allocator> regex_type;
|
typedef <A href="basic_regex.html">basic_regex</A><charT, traits, Allocator> regex_type;
|
||||||
typedef basic_string<charT> value_type;
|
typedef <A href="sub_match.html">sub_match</A>
|
||||||
|
<BidirectionalIterator> value_type;
|
||||||
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
|
||||||
typedef const value_type* pointer;
|
typedef const value_type* pointer;
|
||||||
typedef const value_type& reference;
|
typedef const value_type& reference;
|
||||||
@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
|
|||||||
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
<B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
|
||||||
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
<PRE><A name=o4></A>const value_type& operator*()const;</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>returns the current string being enumerated.</P>
|
<B>Effects: </B>returns the current character sequence being enumerated.</P>
|
||||||
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
<PRE><A name=o5></A>const value_type* operator->()const;</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
<B>Effects: </B>returns <CODE>&(*this)</CODE>.</P>
|
||||||
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
<PRE><A name=o6></A>regex_token_iterator& operator++();</PRE>
|
||||||
<P>
|
<P>
|
||||||
<B>Effects: </B>Moves on to the next string to be enumerated.</P>
|
<B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
|
||||||
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
<P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of
|
||||||
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>),
|
||||||
or if the program runs out of stack space while matching the expression (if
|
or if the program runs out of stack space while matching the expression (if
|
||||||
@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)
|
|||||||
24 Oct 2003
|
24 Oct 2003
|
||||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->
|
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||||
2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
|
||||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||||
|
@ -209,6 +209,9 @@ using std::distance;
|
|||||||
# ifdef BOOST_REGEX_DYN_LINK
|
# ifdef BOOST_REGEX_DYN_LINK
|
||||||
# define BOOST_DYN_LINK
|
# define BOOST_DYN_LINK
|
||||||
# endif
|
# endif
|
||||||
|
#ifdef BOOST_REGEX_DIAG
|
||||||
|
# define BOOST_LIB_DIAGNOSTIC
|
||||||
|
#endif
|
||||||
# include <boost/config/auto_link.hpp>
|
# include <boost/config/auto_link.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -44,6 +44,9 @@ namespace boost{
|
|||||||
# pragma warning(disable:4700)
|
# pragma warning(disable:4700)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// testing
|
||||||
|
#define TEST_BECKER_INTERFACE
|
||||||
|
|
||||||
template <class BidirectionalIterator,
|
template <class BidirectionalIterator,
|
||||||
class charT,
|
class charT,
|
||||||
class traits,
|
class traits,
|
||||||
@ -51,13 +54,17 @@ template <class BidirectionalIterator,
|
|||||||
class regex_token_iterator_implementation
|
class regex_token_iterator_implementation
|
||||||
{
|
{
|
||||||
typedef basic_regex<charT, traits, Allocator> regex_type;
|
typedef basic_regex<charT, traits, Allocator> regex_type;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
typedef sub_match<BidirectionalIterator> value_type;
|
||||||
|
#else
|
||||||
typedef std::basic_string<charT> value_type;
|
typedef std::basic_string<charT> value_type;
|
||||||
|
#endif
|
||||||
|
|
||||||
match_results<BidirectionalIterator> what; // current match
|
match_results<BidirectionalIterator> what; // current match
|
||||||
BidirectionalIterator end; // end of search area
|
BidirectionalIterator end; // end of search area
|
||||||
const regex_type* pre; // the expression
|
const regex_type* pre; // the expression
|
||||||
match_flag_type flags; // match flags
|
match_flag_type flags; // match flags
|
||||||
std::basic_string<charT> result; // the current string result
|
value_type result; // the current string result
|
||||||
int N; // the current sub-expression being enumerated
|
int N; // the current sub-expression being enumerated
|
||||||
std::vector<int> subs; // the sub-expressions to enumerate
|
std::vector<int> subs; // the sub-expressions to enumerate
|
||||||
|
|
||||||
@ -99,12 +106,22 @@ public:
|
|||||||
if(regex_search(first, end, what, *pre, flags) == true)
|
if(regex_search(first, end, what, *pre, flags) == true)
|
||||||
{
|
{
|
||||||
N = 0;
|
N = 0;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]);
|
||||||
|
#else
|
||||||
result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
|
result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if((subs[N] == -1) && (first != end))
|
else if((subs[N] == -1) && (first != end))
|
||||||
{
|
{
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
result.first = first;
|
||||||
|
result.second = end;
|
||||||
|
result.matched = true;
|
||||||
|
#else
|
||||||
result = value_type(first, end);
|
result = value_type(first, end);
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -119,7 +136,7 @@ public:
|
|||||||
&& (what[0].first == that.what[0].first)
|
&& (what[0].first == that.what[0].first)
|
||||||
&& (what[0].second == that.what[0].second);
|
&& (what[0].second == that.what[0].second);
|
||||||
}
|
}
|
||||||
const std::basic_string<charT>& get()
|
const value_type& get()
|
||||||
{ return result; }
|
{ return result; }
|
||||||
bool next()
|
bool next()
|
||||||
{
|
{
|
||||||
@ -128,7 +145,11 @@ public:
|
|||||||
if(N+1 < (int)subs.size())
|
if(N+1 < (int)subs.size())
|
||||||
{
|
{
|
||||||
++N;
|
++N;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
result =((subs[N] == -1) ? what.prefix().first : what[subs[N]]);
|
||||||
|
#else
|
||||||
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
|
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if(what.prefix().first != what[0].second)
|
if(what.prefix().first != what[0].second)
|
||||||
@ -137,13 +158,23 @@ public:
|
|||||||
if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
|
if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
|
||||||
{
|
{
|
||||||
N =0;
|
N =0;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
|
||||||
|
#else
|
||||||
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
|
result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if((last_end != end) && (subs[0] == -1))
|
else if((last_end != end) && (subs[0] == -1))
|
||||||
{
|
{
|
||||||
N =-1;
|
N =-1;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
result.first = last_end;
|
||||||
|
result.second = end;
|
||||||
|
result.matched = true;
|
||||||
|
#else
|
||||||
result = value_type(last_end, end);
|
result = value_type(last_end, end);
|
||||||
|
#endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -161,7 +192,11 @@ private:
|
|||||||
typedef shared_ptr<impl> pimpl;
|
typedef shared_ptr<impl> pimpl;
|
||||||
public:
|
public:
|
||||||
typedef basic_regex<charT, traits, Allocator> regex_type;
|
typedef basic_regex<charT, traits, Allocator> regex_type;
|
||||||
|
#ifdef TEST_BECKER_INTERFACE
|
||||||
|
typedef sub_match<BidirectionalIterator> value_type;
|
||||||
|
#else
|
||||||
typedef std::basic_string<charT> value_type;
|
typedef std::basic_string<charT> value_type;
|
||||||
|
#endif
|
||||||
typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type
|
typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type
|
||||||
difference_type;
|
difference_type;
|
||||||
typedef const value_type* pointer;
|
typedef const value_type* pointer;
|
||||||
|
Reference in New Issue
Block a user