From 23f71d7727aa140c165fe34b008a1d6a7a6f12ff Mon Sep 17 00:00:00 2001 From: John Maddock Date: Mon, 3 Nov 2003 11:25:30 +0000 Subject: [PATCH] Changed regex_token_iterator to return a sub_match rather than a std::string (it's more efficient). [SVN r20613] --- doc/Attic/regex_token_iterator.html | 21 +++++----- doc/regex_token_iterator.html | 21 +++++----- include/boost/regex/config.hpp | 3 ++ .../boost/regex/v4/regex_token_iterator.hpp | 39 ++++++++++++++++++- 4 files changed, 62 insertions(+), 22 deletions(-) diff --git a/doc/Attic/regex_token_iterator.html b/doc/Attic/regex_token_iterator.html index abe9cb04..1cbd0b00 100644 --- a/doc/Attic/regex_token_iterator.html +++ b/doc/Attic/regex_token_iterator.html @@ -30,11 +30,12 @@

The template class regex_token_iterator is an iterator adapter; that is to say it represents a new view of an existing iterator sequence, by enumerating all the occurrences of a regular expression within that sequence, - and presenting one or more new strings for each match found. Each position - enumerated by the iterator is a string that represents what matched a - particular sub-expression within the regular expression. When class regex_token_iterator - is used to enumerate a single sub-expression with index -1, then the iterator - performs field splitting: that is to say it enumerates one string for each + and presenting one or more character sequence for each match found. Each + position enumerated by the iterator is a sub_match + object that represents what matched a particular sub-expression within the + regular expression. When class regex_token_iterator is used to + enumerate a single sub-expression with index -1, then the iterator performs + field splitting: that is to say it enumerates one character sequence for each section of the character container sequence that does not match the regular expression specified.

@@ -46,7 +47,8 @@ class regex_token_iterator
 {
 public:
    typedef          basic_regex<charT, traits, Allocator>                   regex_type;
-   typedef          basic_string<charT>                                     value_type;
+   typedef          sub_match                     
+             <BidirectionalIterator>                                     value_type;
    typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
    typedef          const value_type*                                       pointer;
    typedef          const value_type&                                       reference;
@@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
          Effects: returns !(*this == that).

const value_type& operator*()const;

- Effects: returns the current string being enumerated.

+ Effects: returns the current character sequence being enumerated.

const value_type* operator->()const;

Effects: returns &(*this).

regex_token_iterator& operator++();

- Effects: Moves on to the next string to be enumerated.

+ Effects: Moves on to the next character sequence to be enumerated.

Throws: std::runtime_error if the complexity of matching the expression against an N character string begins to exceed O(N2), or if the program runs out of stack space while matching the expression (if @@ -275,8 +277,7 @@ boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*) 24 Oct 2003

© Copyright John Maddock 1998- - - 2003

+ 2003

Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

diff --git a/doc/regex_token_iterator.html b/doc/regex_token_iterator.html index abe9cb04..1cbd0b00 100644 --- a/doc/regex_token_iterator.html +++ b/doc/regex_token_iterator.html @@ -30,11 +30,12 @@

The template class regex_token_iterator is an iterator adapter; that is to say it represents a new view of an existing iterator sequence, by enumerating all the occurrences of a regular expression within that sequence, - and presenting one or more new strings for each match found. Each position - enumerated by the iterator is a string that represents what matched a - particular sub-expression within the regular expression. When class regex_token_iterator - is used to enumerate a single sub-expression with index -1, then the iterator - performs field splitting: that is to say it enumerates one string for each + and presenting one or more character sequence for each match found. Each + position enumerated by the iterator is a sub_match + object that represents what matched a particular sub-expression within the + regular expression. When class regex_token_iterator is used to + enumerate a single sub-expression with index -1, then the iterator performs + field splitting: that is to say it enumerates one character sequence for each section of the character container sequence that does not match the regular expression specified.

@@ -46,7 +47,8 @@ class regex_token_iterator
 {
 public:
    typedef          basic_regex<charT, traits, Allocator>                   regex_type;
-   typedef          basic_string<charT>                                     value_type;
+   typedef          sub_match                     
+             <BidirectionalIterator>                                     value_type;
    typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
    typedef          const value_type*                                       pointer;
    typedef          const value_type&                                       reference;
@@ -140,13 +142,13 @@ regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const reg
          Effects: returns !(*this == that).

const value_type& operator*()const;

- Effects: returns the current string being enumerated.

+ Effects: returns the current character sequence being enumerated.

const value_type* operator->()const;

Effects: returns &(*this).

regex_token_iterator& operator++();

- Effects: Moves on to the next string to be enumerated.

+ Effects: Moves on to the next character sequence to be enumerated.

Throws: std::runtime_error if the complexity of matching the expression against an N character string begins to exceed O(N2), or if the program runs out of stack space while matching the expression (if @@ -275,8 +277,7 @@ boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*) 24 Oct 2003

© Copyright John Maddock 1998- - - 2003

+ 2003

Use, modification and distribution are subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

diff --git a/include/boost/regex/config.hpp b/include/boost/regex/config.hpp index 991f1cba..a3aaf5f9 100644 --- a/include/boost/regex/config.hpp +++ b/include/boost/regex/config.hpp @@ -209,6 +209,9 @@ using std::distance; # ifdef BOOST_REGEX_DYN_LINK # define BOOST_DYN_LINK # endif +#ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +#endif # include #endif diff --git a/include/boost/regex/v4/regex_token_iterator.hpp b/include/boost/regex/v4/regex_token_iterator.hpp index 90fa7c21..a5ed5f3d 100644 --- a/include/boost/regex/v4/regex_token_iterator.hpp +++ b/include/boost/regex/v4/regex_token_iterator.hpp @@ -44,6 +44,9 @@ namespace boost{ # pragma warning(disable:4700) #endif +// testing +#define TEST_BECKER_INTERFACE + template regex_type; +#ifdef TEST_BECKER_INTERFACE + typedef sub_match value_type; +#else typedef std::basic_string value_type; +#endif match_results what; // current match BidirectionalIterator end; // end of search area const regex_type* pre; // the expression match_flag_type flags; // match flags - std::basic_string result; // the current string result + value_type result; // the current string result int N; // the current sub-expression being enumerated std::vector subs; // the sub-expressions to enumerate @@ -99,12 +106,22 @@ public: if(regex_search(first, end, what, *pre, flags) == true) { N = 0; +#ifdef TEST_BECKER_INTERFACE + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); +#else result = ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[(int)subs[N]].str())); +#endif return true; } else if((subs[N] == -1) && (first != end)) { +#ifdef TEST_BECKER_INTERFACE + result.first = first; + result.second = end; + result.matched = true; +#else result = value_type(first, end); +#endif return true; } return false; @@ -119,7 +136,7 @@ public: && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); } - const std::basic_string& get() + const value_type& get() { return result; } bool next() { @@ -128,7 +145,11 @@ public: if(N+1 < (int)subs.size()) { ++N; +#ifdef TEST_BECKER_INTERFACE + result =((subs[N] == -1) ? what.prefix().first : what[subs[N]]); +#else result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second)); +#endif return true; } if(what.prefix().first != what[0].second) @@ -137,13 +158,23 @@ public: if(regex_search(last_end, end, what, *pre, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags))) { N =0; +#ifdef TEST_BECKER_INTERFACE + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); +#else result =((subs[N] == -1) ? value_type(what.prefix().first, what.prefix().second) : value_type(what[subs[N]].first, what[subs[N]].second)); +#endif return true; } else if((last_end != end) && (subs[0] == -1)) { N =-1; +#ifdef TEST_BECKER_INTERFACE + result.first = last_end; + result.second = end; + result.matched = true; +#else result = value_type(last_end, end); +#endif return true; } return false; @@ -161,7 +192,11 @@ private: typedef shared_ptr pimpl; public: typedef basic_regex regex_type; +#ifdef TEST_BECKER_INTERFACE + typedef sub_match value_type; +#else typedef std::basic_string value_type; +#endif typedef typename re_detail::regex_iterator_traits::difference_type difference_type; typedef const value_type* pointer;