1) Disabled recursive implementation for VC8: stack overflows can't be reliably detected unless the whole program is compiled with asynchronous exceptions.

2) Changed std::copy calls on VC8 to avoid "dangerous code" warnings. 3) Moved backreference and octal escape code into line with POSIX-extended requirements. 4) Changed match_results leftmost-longest rules to stop unnecessary std::distance computations (an optimisation for non-random access iterators). 5) Changed C lib calls to use "safe" versions of string API's where available. 6) Added many new POSIX-extended leftmost-longest tests, to verify the above. [SVN r27880]
2025-07-29 12:07:28 +02:00 · 2005-03-30 11:38:51 +00:00
parent ca144bb2b3
commit de28eb9b18
17 changed files with 361 additions and 106 deletions
--- a/include/boost/regex/config.hpp
+++ b/include/boost/regex/config.hpp
@ -319,7 +319,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page();
 ****************************************************************************/

 #if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE)
-#  if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG)
+#  if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(BOOST_MSVC) && (BOOST_MSVC >= 1400))
 #     define BOOST_REGEX_RECURSIVE
 #  else
 #     define BOOST_REGEX_NON_RECURSIVE
--- a/include/boost/regex/icu.hpp
+++ b/include/boost/regex/icu.hpp
@ -728,7 +728,7 @@ OutputIterator do_regex_replace(OutputIterator out,
   if(i == j)
   {
      if(!(flags & regex_constants::format_no_copy))
-         out = std::copy(in.first, in.second, out);
+         out = re_detail::copy(in.first, in.second, out);
   }
   else
   {
@ -736,7 +736,7 @@ OutputIterator do_regex_replace(OutputIterator out,
      while(i != j)
      {
         if(!(flags & regex_constants::format_no_copy))
-            out = std::copy(i->prefix().first, i->prefix().second, out); 
+            out = re_detail::copy(i->prefix().first, i->prefix().second, out); 
         out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.end(), flags, e.get_traits());
         last_m = (*i)[0].second;
         if(flags & regex_constants::format_first_only)
@ -744,7 +744,7 @@ OutputIterator do_regex_replace(OutputIterator out,
         ++i;
      }
      if(!(flags & regex_constants::format_no_copy))
-         out = std::copy(last_m, in.second, out);
+         out = re_detail::copy(last_m, in.second, out);
   }
   return out;
 }
--- a/include/boost/regex/v4/basic_regex_creator.hpp
+++ b/include/boost/regex/v4/basic_regex_creator.hpp
@ -435,10 +435,10 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
         return 0;
      }
      charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
-      std::copy(s1.begin(), s1.end(), p);
+      re_detail::copy(s1.begin(), s1.end(), p);
      p[s1.size()] = charT(0);
      p += s1.size() + 1;
-      std::copy(s2.begin(), s2.end(), p);
+      re_detail::copy(s2.begin(), s2.end(), p);
      p[s2.size()] = charT(0);
   }
   //
@ -459,7 +459,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
      if(s.empty())
         return 0;  // invalid or unsupported equivalence class
      charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
-      std::copy(s.begin(), s.end(), p);
+      re_detail::copy(s.begin(), s.end(), p);
      p[s.size()] = charT(0);
      ++first;
   }
@ -620,7 +620,7 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
   m_pdata->m_expression_len = len;
   charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
   m_pdata->m_expression = ps;
-   std::copy(p1, p2, ps);
+   re_detail::copy(p1, p2, ps);
   ps[p2 - p1] = 0;
   // fill in our other data...
   // successful parsing implies a zero status:
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@ -1422,7 +1422,15 @@ charT basic_regex_parser<charT, traits>::unescape_character()
      // an octal escape sequence, the first character must be a zero
      // followed by up to 3 octal digits:
      std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
-      int val = this->m_traits.toi(m_position, m_position + len, 8);
+      const charT* bp = m_position;
+      int val = this->m_traits.toi(bp, bp + 1, 8);
+      if(val != 0)
+      {
+         // Oops not an octal escape after all:
+         fail(regex_constants::error_escape, m_position - m_base);
+         return result;
+      }
+      val = this->m_traits.toi(m_position, m_position + len, 8);
      if(val < 0) 
      {
         fail(regex_constants::error_escape, m_position - m_base);
@ -1477,19 +1485,20 @@ template <class charT, class traits>
 bool basic_regex_parser<charT, traits>::parse_backref()
 {
   BOOST_ASSERT(m_position != m_end);
-   int i = this->m_traits.toi(m_position, m_position + 1, 10);
-   if((i > 0) && (this->m_backrefs & (1u << (i-1))))
-   {
-      re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
-      pb->index = i;
-   }
-   else if(i == 0)
+   const charT* pc = m_position;
+   int i = this->m_traits.toi(pc, pc + 1, 10);
+   if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
   {
      // not a backref at all but an octal escape sequence:
-      --m_position;
      charT c = unescape_character();
      this->append_literal(c);
   }
+   else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
+   {
+      m_position = pc;
+      re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
+      pb->index = i;
+   }
   else
   {
      fail(regex_constants::error_backref, m_position - m_end);
--- a/include/boost/regex/v4/match_results.hpp
+++ b/include/boost/regex/v4/match_results.hpp
@ -300,29 +300,73 @@ void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const
   const_iterator p1, p2;
   p1 = begin();
   p2 = m.begin();
-   BidiIterator base = (*this)[-1].first;
-   std::size_t len1 = 0;
-   std::size_t len2 = 0;
-   std::size_t base1 = 0;
-   std::size_t base2 = 0;
+   //
+   // Distances are measured from the start of *this* match, unless this isn't
+   // a valid match in which case we use the start of the whole sequence.  Note that
+   // no subsequent match-candidate can ever be to the left of the first match found.
+   // This ensures that when we are using bidirectional iterators, that distances 
+   // measured are as short as possible, and therefore as efficient as possible
+   // to compute.  Finally note that we don't use the "matched" data member to test
+   // whether a sub-expression is a valid match, because partial matches set this
+   // to false for sub-expression 0.
+   //
+   BidiIterator end = this->suffix().second;
+   BidiIterator base = (p1->first == end) ? this->prefix().first : (*this)[0].first;
+   difference_type len1 = 0;
+   difference_type len2 = 0;
+   difference_type base1 = 0;
+   difference_type base2 = 0;
   std::size_t i;
-   for(i = 0; i < size(); ++i)
+   for(i = 0; i < size(); ++i, ++p1, ++p2)
   {
      //
-      // leftmost takes priority over longest:
+      // Leftmost takes priority over longest; handle special cases
+      // where distances need not be computed first (an optimisation
+      // for bidirectional iterators: ensure that we don't accidently
+      // compute the length of the whole sequence, as this can be really
+      // expensive).
+      //
+      if(p1->first == end)
+      {
+         if(p2->first != end)
+         {
+            // p2 must be better than p1, and no need to calculate
+            // actual distances:
+            base1 = 1;
+            base2 = 0;
+            break;
+         }
+         else
+         {
+            // *p1 and *p2 are either unmatched or match end-of sequence,
+            // either way no need to calculate distances:
+            if((p1->matched == false) && (p2->matched == true))
+               break;
+            if((p1->matched == true) && (p2->matched == false))
+               return;
+            continue;
+         }
+      }
+      else if(p2->first == end)
+      {
+         // p1 better than p2, and no need to calculate distances:
+         return;
+      }
      base1 = ::boost::re_detail::distance(base, p1->first);
      base2 = ::boost::re_detail::distance(base, p2->first);
+      BOOST_ASSERT(base1 >= 0);
+      BOOST_ASSERT(base2 >= 0);
      if(base1 < base2) return;
      if(base2 < base1) break;

      len1 = ::boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second);
      len2 = ::boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second);
+      BOOST_ASSERT(len1 >= 0);
+      BOOST_ASSERT(len2 >= 0);
      if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
         break;
      if((p1->matched == true) && (p2->matched == false))
         return;
-      ++p1;
-      ++p2;
   }
   if(i == size())
      return;
--- a/include/boost/regex/v4/regex_format.hpp
+++ b/include/boost/regex/v4/regex_format.hpp
@ -524,6 +524,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match

 template <class S>
 class string_out_iterator
+#ifndef BOOST_NO_STD_ITERATOR
+   : public std::iterator<std::output_iterator_tag, typename S::value_type>
+#endif
 {
   S* out;
 public:
@ -537,11 +540,13 @@ public:
      return *this; 
   }

+#ifdef BOOST_NO_STD_ITERATOR
   typedef std::ptrdiff_t difference_type;
   typedef typename S::value_type value_type;
   typedef value_type* pointer;
   typedef value_type& reference;
   typedef std::output_iterator_tag iterator_category;
+#endif
 };

 template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
@ -554,7 +559,7 @@ OutputIterator regex_format_imp(OutputIterator out,
 {
   if(flags & regex_constants::format_literal)
   {
-      return std::copy(p1, p2, out);
+      return re_detail::copy(p1, p2, out);
   }

   re_detail::basic_regex_formatter<
--- a/include/boost/regex/v4/regex_replace.hpp
+++ b/include/boost/regex/v4/regex_replace.hpp
@ -41,7 +41,7 @@ OutputIterator regex_replace(OutputIterator out,
   if(i == j)
   {
      if(!(flags & regex_constants::format_no_copy))
-         out = std::copy(first, last, out);
+         out = re_detail::copy(first, last, out);
   }
   else
   {
@ -49,7 +49,7 @@ OutputIterator regex_replace(OutputIterator out,
      while(i != j)
      {
         if(!(flags & regex_constants::format_no_copy))
-            out = std::copy(i->prefix().first, i->prefix().second, out); 
+            out = re_detail::copy(i->prefix().first, i->prefix().second, out); 
         out = i->format(out, fmt, flags, e);
         last_m = (*i)[0].second;
         if(flags & regex_constants::format_first_only)
@ -57,7 +57,7 @@ OutputIterator regex_replace(OutputIterator out,
         ++i;
      }
      if(!(flags & regex_constants::format_no_copy))
-         out = std::copy(last_m, last, out);
+         out = re_detail::copy(last_m, last, out);
   }
   return out;
 }
--- a/include/boost/regex/v4/regex_workaround.hpp
+++ b/include/boost/regex/v4/regex_workaround.hpp
@ -1,6 +1,6 @@
 /*
 *
- * Copyright (c) 1998-2004
+ * Copyright (c) 1998-2005
 * John Maddock
 *
 * Use, modification and distribution are subject to the 
@ -43,6 +43,12 @@
 #   include <locale>
 #endif

+#if defined(BOOST_NO_STDC_NAMESPACE)
+namespace std{
+   using ::sprintf; using ::strcpy; using ::strcat; using ::strlen;
+}
+#endif
+
 namespace boost{ namespace re_detail{
 #ifdef BOOST_NO_STD_DISTANCE
 template <class T>
@ -112,4 +118,73 @@ inline void pointer_construct(T* p, const T& t)
 }} // namespaces
 #endif

+/*****************************************************************************
+ *
+ *  helper function copy:
+ *
+ ****************************************************************************/
+
+#ifdef __cplusplus
+namespace boost{ namespace re_detail{
+#if BOOST_WORKAROUND(BOOST_MSVC,>=1400)
+   //
+   // MSVC 8 will either emit warnings or else refuse to compile
+   // code that makes perfectly legitimate use of std::copy, when
+   // the OutputIterator type is a user-defined class (apparently all user 
+   // defined iterators are "unsafe").  This code works around that:
+   //
+   template<class InputIterator, class OutputIterator>
+   inline OutputIterator copy(
+      InputIterator first, 
+      InputIterator last, 
+      OutputIterator dest
+   )
+   {
+      return stdext::unchecked_copy(first, last, dest);
+   }
+
+   // use safe versions of strcpy etc:
+   using ::strcpy_s;
+   using ::strcat_s;
+#else
+   using std::copy;
+
+   inline std::size_t strcpy_s(
+      char *strDestination,
+      std::size_t sizeInBytes,
+      const char *strSource 
+   )
+   {
+      if(std::strlen(strSource)+1 > sizeInBytes)
+         return 1;
+      std::strcpy(strDestination, strSource);
+      return 0;
+   }
+   inline std::size_t strcat_s(
+      char *strDestination,
+      std::size_t sizeInBytes,
+      const char *strSource 
+   )
+   {
+      if(std::strlen(strSource) + std::strlen(strDestination) + 1 > sizeInBytes)
+         return 1;
+      std::strcat(strDestination, strSource);
+      return 0;
+   }
+
+#endif
+
+   inline void overflow_error_if_not_zero(std::size_t i)
+   {
+      if(i)
+      {
+         std::overflow_error e("String buffer too small");
+         boost::throw_exception(e);
+      }
+   }
+
+}} // namespaces
+#endif
+
 #endif // include guard
+