From 23f71d7727aa140c165fe34b008a1d6a7a6f12ff Mon Sep 17 00:00:00 2001
From: John Maddock <john@johnmaddock.co.uk>
Date: Mon, 3 Nov 2003 11:25:30 +0000
Subject: [PATCH] Changed regex_token_iterator to return a sub_match rather
 than a std::string (it's more efficient).

[SVN r20613]
---
 doc/Attic/regex_token_iterator.html           | 21 +++++-----
 doc/regex_token_iterator.html                 | 21 +++++-----
 include/boost/regex/config.hpp                |  3 ++
 .../boost/regex/v4/regex_token_iterator.hpp   | 39 ++++++++++++++++++-
 4 files changed, 62 insertions(+), 22 deletions(-)
diff --git a/doc/Attic/regex_token_iterator.html b/doc/Attic/regex_token_iterator.html
index abe9cb04..1cbd0b00 100644
--- a/doc/Attic/regex_token_iterator.html
+++ b/doc/Attic/regex_token_iterator.html
@@ -30,11 +30,12 @@
       <P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter; 
          that is to say it represents a new view of an existing iterator sequence, by 
          enumerating all the occurrences of a regular expression within that sequence, 
-         and presenting one or more new strings for each match found. Each position 
-         enumerated by the iterator is a string that represents what matched a 
-         particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
-         is used to enumerate a single sub-expression with index -1, then the iterator 
-         performs field splitting: that is to say it enumerates one string for each 
+         and presenting one or more character sequence for each match found. Each 
+         position enumerated by the iterator is a <A href="sub_match.html">sub_match</A> 
+         object that represents what matched a particular sub-expression within the 
+         regular expression. When class <CODE>regex_token_iterator</CODE> is used to 
+         enumerate a single sub-expression with index -1, then the iterator performs 
+         field splitting: that is to say it enumerates one character sequence for each 
          section of the character container sequence that does not match the regular 
          expression specified.</P>
       <PRE>
@@ -46,7 +47,8 @@ class regex_token_iterator
 {
 public:
    typedef          <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt;                   regex_type;
-   typedef          basic_string&lt;charT&gt;                                     value_type;
+   typedef          <A href="sub_match.html">sub_match</A>                     
+             &lt;BidirectionalIterator&gt;                                     value_type;
    typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
    typedef          const value_type*                                       pointer;
    typedef          const value_type&amp;                                       reference;
@@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
          <B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
       <PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
       <P>
-         <B>Effects: </B>returns the current string being enumerated.</P>
+         <B>Effects: </B>returns the current character sequence being enumerated.</P>
       <PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
       <P>
          <B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
       <PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
       <P>
-         <B>Effects: </B>Moves on to the next string to be enumerated.</P>
+         <B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
       <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
          matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
          or if the program runs out of stack space while matching the expression (if 
@@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
          24 Oct 2003 
          <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
       <p><i>© Copyright John Maddock&nbsp;1998- 
-            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 
-            2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
+            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->  2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
       <P><I>Use, modification and distribution are subject to the Boost Software License, 
             Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
             or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
diff --git a/doc/regex_token_iterator.html b/doc/regex_token_iterator.html
index abe9cb04..1cbd0b00 100644
--- a/doc/regex_token_iterator.html
+++ b/doc/regex_token_iterator.html
@@ -30,11 +30,12 @@
       <P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter; 
          that is to say it represents a new view of an existing iterator sequence, by 
          enumerating all the occurrences of a regular expression within that sequence, 
-         and presenting one or more new strings for each match found. Each position 
-         enumerated by the iterator is a string that represents what matched a 
-         particular sub-expression within the regular expression. When class <CODE>regex_token_iterator</CODE>
-         is used to enumerate a single sub-expression with index -1, then the iterator 
-         performs field splitting: that is to say it enumerates one string for each 
+         and presenting one or more character sequence for each match found. Each 
+         position enumerated by the iterator is a <A href="sub_match.html">sub_match</A> 
+         object that represents what matched a particular sub-expression within the 
+         regular expression. When class <CODE>regex_token_iterator</CODE> is used to 
+         enumerate a single sub-expression with index -1, then the iterator performs 
+         field splitting: that is to say it enumerates one character sequence for each 
          section of the character container sequence that does not match the regular 
          expression specified.</P>
       <PRE>
@@ -46,7 +47,8 @@ class regex_token_iterator
 {
 public:
    typedef          <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt;                   regex_type;
-   typedef          basic_string&lt;charT&gt;                                     value_type;
+   typedef          <A href="sub_match.html">sub_match</A>                     
+             &lt;BidirectionalIterator&gt;                                     value_type;
    typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
    typedef          const value_type*                                       pointer;
    typedef          const value_type&amp;                                       reference;
@@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
          <B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
       <PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
       <P>
-         <B>Effects: </B>returns the current string being enumerated.</P>
+         <B>Effects: </B>returns the current character sequence being enumerated.</P>
       <PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
       <P>
          <B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
       <PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
       <P>
-         <B>Effects: </B>Moves on to the next string to be enumerated.</P>
+         <B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
       <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
          matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
          or if the program runs out of stack space while matching the expression (if 
@@ -275,8 +277,7 @@ boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)
          24 Oct 2003 
          <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
       <p><i>© Copyright John Maddock&nbsp;1998- 
-            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 
-            2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
+            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->  2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
       <P><I>Use, modification and distribution are subject to the Boost Software License, 
             Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
             or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
diff --git a/include/boost/regex/config.hpp b/include/boost/regex/config.hpp
index 991f1cba..a3aaf5f9 100644
--- a/include/boost/regex/config.hpp
+++ b/include/boost/regex/config.hpp
@@ -209,6 +209,9 @@ using std::distance;
 #  ifdef BOOST_REGEX_DYN_LINK
 #     define BOOST_DYN_LINK
 #  endif
+#ifdef BOOST_REGEX_DIAG
+#  define BOOST_LIB_DIAGNOSTIC
+#endif
 #  include <boost/config/auto_link.hpp>
 #endif
 
diff --git a/include/boost/regex/v4/regex_token_iterator.hpp b/include/boost/regex/v4/regex_token_iterator.hpp
index 90fa7c21..a5ed5f3d 100644
--- a/include/boost/regex/v4/regex_token_iterator.hpp
+++ b/include/boost/regex/v4/regex_token_iterator.hpp
@@ -44,6 +44,9 @@ namespace boost{
 #  pragma warning(disable:4700)
 #endif
 
+// testing
+#define TEST_BECKER_INTERFACE
+
 template <class BidirectionalIterator,
           class charT,
           class traits,
@@ -51,13 +54,17 @@ template <class BidirectionalIterator,
 class regex_token_iterator_implementation 
 {
    typedef basic_regex<charT, traits, Allocator> regex_type;
+#ifdef TEST_BECKER_INTERFACE
+   typedef sub_match<BidirectionalIterator>      value_type;
+#else
    typedef std::basic_string<charT>              value_type;
+#endif
 
    match_results<BidirectionalIterator> what;   // current match
    BidirectionalIterator                end;    // end of search area
    const regex_type*                    pre;    // the expression
    match_flag_type                      flags;  // match flags
-   std::basic_string<charT>             result; // the current string result
+   value_type                           result; // the current string result
    int                                  N;      // the current sub-expression being enumerated
    std::vector<int>                     subs;   // the sub-expressions to enumerate
 
@@ -99,12 +106,22 @@ public:
       if(regex_search(first, end, what, *pre, flags) == true)
       {
          N = 0;
+#ifdef TEST_BECKER_INTERFACE
+         result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]);
+#else
          result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str()));
+#endif
          return true;
       }
       else if((subs[N] == -1) && (first != end))
       {
+#ifdef TEST_BECKER_INTERFACE
+         result.first = first;
+         result.second = end;
+         result.matched = true;
+#else
          result = value_type(first, end);
+#endif
          return true;
       }
       return false;
@@ -119,7 +136,7 @@ public:
          && (what[0].first == that.what[0].first) 
          && (what[0].second == that.what[0].second);
    }
-   const std::basic_string<charT>& get()
+   const value_type& get()
    { return result; }
    bool next()
    {
@@ -128,7 +145,11 @@ public:
       if(N+1 < (int)subs.size())
       {
          ++N;
+#ifdef TEST_BECKER_INTERFACE
+         result =((subs[N] == -1) ? what.prefix().first : what[subs[N]]);
+#else
          result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
+#endif
          return true;
       }
       if(what.prefix().first != what[0].second)
@@ -137,13 +158,23 @@ public:
       if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags)))
       {
          N =0;
+#ifdef TEST_BECKER_INTERFACE
+         result =((subs[N] == -1) ? what.prefix() : what[subs[N]]);
+#else
          result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second));
+#endif
          return true;
       }
       else if((last_end != end) && (subs[0] == -1))
       {
          N =-1;
+#ifdef TEST_BECKER_INTERFACE
+         result.first = last_end;
+         result.second = end;
+         result.matched = true;
+#else
          result = value_type(last_end, end);
+#endif
          return true;
       }
       return false;
@@ -161,7 +192,11 @@ private:
    typedef shared_ptr<impl> pimpl;
 public:
    typedef          basic_regex<charT, traits, Allocator>                   regex_type;
+#ifdef TEST_BECKER_INTERFACE
+   typedef          sub_match<BidirectionalIterator>                        value_type;
+#else
    typedef          std::basic_string<charT>                                value_type;
+#endif
    typedef typename re_detail::regex_iterator_traits<BidirectionalIterator>::difference_type 
                                                                             difference_type;
    typedef          const value_type*                                       pointer;