1) Disabled recursive implementation for VC8: stack overflows can't be reliably detected unless the whole program is compiled with asynchronous exceptions.

2) Changed std::copy calls on VC8 to avoid "dangerous code" warnings.
3) Moved backreference and octal escape code into line with POSIX-extended requirements.
4) Changed match_results leftmost-longest rules to stop unnecessary std::distance computations (an optimisation for non-random access iterators).
5) Changed C lib calls to use "safe" versions of string API's where available.
6) Added many new POSIX-extended leftmost-longest tests, to verify the above.


[SVN r27880]
This commit is contained in:
John Maddock
2005-03-30 11:38:51 +00:00
parent ca144bb2b3
commit de28eb9b18
17 changed files with 361 additions and 106 deletions

View File

@ -319,7 +319,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page();
****************************************************************************/
#if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE)
# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG)
# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(BOOST_MSVC) && (BOOST_MSVC >= 1400))
# define BOOST_REGEX_RECURSIVE
# else
# define BOOST_REGEX_NON_RECURSIVE

View File

@ -728,7 +728,7 @@ OutputIterator do_regex_replace(OutputIterator out,
if(i == j)
{
if(!(flags & regex_constants::format_no_copy))
out = std::copy(in.first, in.second, out);
out = re_detail::copy(in.first, in.second, out);
}
else
{
@ -736,7 +736,7 @@ OutputIterator do_regex_replace(OutputIterator out,
while(i != j)
{
if(!(flags & regex_constants::format_no_copy))
out = std::copy(i->prefix().first, i->prefix().second, out);
out = re_detail::copy(i->prefix().first, i->prefix().second, out);
out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.end(), flags, e.get_traits());
last_m = (*i)[0].second;
if(flags & regex_constants::format_first_only)
@ -744,7 +744,7 @@ OutputIterator do_regex_replace(OutputIterator out,
++i;
}
if(!(flags & regex_constants::format_no_copy))
out = std::copy(last_m, in.second, out);
out = re_detail::copy(last_m, in.second, out);
}
return out;
}

View File

@ -435,10 +435,10 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
return 0;
}
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
std::copy(s1.begin(), s1.end(), p);
re_detail::copy(s1.begin(), s1.end(), p);
p[s1.size()] = charT(0);
p += s1.size() + 1;
std::copy(s2.begin(), s2.end(), p);
re_detail::copy(s2.begin(), s2.end(), p);
p[s2.size()] = charT(0);
}
//
@ -459,7 +459,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
if(s.empty())
return 0; // invalid or unsupported equivalence class
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
std::copy(s.begin(), s.end(), p);
re_detail::copy(s.begin(), s.end(), p);
p[s.size()] = charT(0);
++first;
}
@ -620,7 +620,7 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
m_pdata->m_expression_len = len;
charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
m_pdata->m_expression = ps;
std::copy(p1, p2, ps);
re_detail::copy(p1, p2, ps);
ps[p2 - p1] = 0;
// fill in our other data...
// successful parsing implies a zero status:

View File

@ -1422,7 +1422,15 @@ charT basic_regex_parser<charT, traits>::unescape_character()
// an octal escape sequence, the first character must be a zero
// followed by up to 3 octal digits:
std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
int val = this->m_traits.toi(m_position, m_position + len, 8);
const charT* bp = m_position;
int val = this->m_traits.toi(bp, bp + 1, 8);
if(val != 0)
{
// Oops not an octal escape after all:
fail(regex_constants::error_escape, m_position - m_base);
return result;
}
val = this->m_traits.toi(m_position, m_position + len, 8);
if(val < 0)
{
fail(regex_constants::error_escape, m_position - m_base);
@ -1477,19 +1485,20 @@ template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_backref()
{
BOOST_ASSERT(m_position != m_end);
int i = this->m_traits.toi(m_position, m_position + 1, 10);
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
{
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
}
else if(i == 0)
const charT* pc = m_position;
int i = this->m_traits.toi(pc, pc + 1, 10);
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
{
// not a backref at all but an octal escape sequence:
--m_position;
charT c = unescape_character();
this->append_literal(c);
}
else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
}
else
{
fail(regex_constants::error_backref, m_position - m_end);

View File

@ -300,29 +300,73 @@ void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const
const_iterator p1, p2;
p1 = begin();
p2 = m.begin();
BidiIterator base = (*this)[-1].first;
std::size_t len1 = 0;
std::size_t len2 = 0;
std::size_t base1 = 0;
std::size_t base2 = 0;
//
// Distances are measured from the start of *this* match, unless this isn't
// a valid match in which case we use the start of the whole sequence. Note that
// no subsequent match-candidate can ever be to the left of the first match found.
// This ensures that when we are using bidirectional iterators, that distances
// measured are as short as possible, and therefore as efficient as possible
// to compute. Finally note that we don't use the "matched" data member to test
// whether a sub-expression is a valid match, because partial matches set this
// to false for sub-expression 0.
//
BidiIterator end = this->suffix().second;
BidiIterator base = (p1->first == end) ? this->prefix().first : (*this)[0].first;
difference_type len1 = 0;
difference_type len2 = 0;
difference_type base1 = 0;
difference_type base2 = 0;
std::size_t i;
for(i = 0; i < size(); ++i)
for(i = 0; i < size(); ++i, ++p1, ++p2)
{
//
// leftmost takes priority over longest:
// Leftmost takes priority over longest; handle special cases
// where distances need not be computed first (an optimisation
// for bidirectional iterators: ensure that we don't accidently
// compute the length of the whole sequence, as this can be really
// expensive).
//
if(p1->first == end)
{
if(p2->first != end)
{
// p2 must be better than p1, and no need to calculate
// actual distances:
base1 = 1;
base2 = 0;
break;
}
else
{
// *p1 and *p2 are either unmatched or match end-of sequence,
// either way no need to calculate distances:
if((p1->matched == false) && (p2->matched == true))
break;
if((p1->matched == true) && (p2->matched == false))
return;
continue;
}
}
else if(p2->first == end)
{
// p1 better than p2, and no need to calculate distances:
return;
}
base1 = ::boost::re_detail::distance(base, p1->first);
base2 = ::boost::re_detail::distance(base, p2->first);
BOOST_ASSERT(base1 >= 0);
BOOST_ASSERT(base2 >= 0);
if(base1 < base2) return;
if(base2 < base1) break;
len1 = ::boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second);
len2 = ::boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second);
BOOST_ASSERT(len1 >= 0);
BOOST_ASSERT(len2 >= 0);
if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
break;
if((p1->matched == true) && (p2->matched == false))
return;
++p1;
++p2;
}
if(i == size())
return;

View File

@ -524,6 +524,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match
template <class S>
class string_out_iterator
#ifndef BOOST_NO_STD_ITERATOR
: public std::iterator<std::output_iterator_tag, typename S::value_type>
#endif
{
S* out;
public:
@ -537,11 +540,13 @@ public:
return *this;
}
#ifdef BOOST_NO_STD_ITERATOR
typedef std::ptrdiff_t difference_type;
typedef typename S::value_type value_type;
typedef value_type* pointer;
typedef value_type& reference;
typedef std::output_iterator_tag iterator_category;
#endif
};
template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
@ -554,7 +559,7 @@ OutputIterator regex_format_imp(OutputIterator out,
{
if(flags & regex_constants::format_literal)
{
return std::copy(p1, p2, out);
return re_detail::copy(p1, p2, out);
}
re_detail::basic_regex_formatter<

View File

@ -41,7 +41,7 @@ OutputIterator regex_replace(OutputIterator out,
if(i == j)
{
if(!(flags & regex_constants::format_no_copy))
out = std::copy(first, last, out);
out = re_detail::copy(first, last, out);
}
else
{
@ -49,7 +49,7 @@ OutputIterator regex_replace(OutputIterator out,
while(i != j)
{
if(!(flags & regex_constants::format_no_copy))
out = std::copy(i->prefix().first, i->prefix().second, out);
out = re_detail::copy(i->prefix().first, i->prefix().second, out);
out = i->format(out, fmt, flags, e);
last_m = (*i)[0].second;
if(flags & regex_constants::format_first_only)
@ -57,7 +57,7 @@ OutputIterator regex_replace(OutputIterator out,
++i;
}
if(!(flags & regex_constants::format_no_copy))
out = std::copy(last_m, last, out);
out = re_detail::copy(last_m, last, out);
}
return out;
}

View File

@ -1,6 +1,6 @@
/*
*
* Copyright (c) 1998-2004
* Copyright (c) 1998-2005
* John Maddock
*
* Use, modification and distribution are subject to the
@ -43,6 +43,12 @@
# include <locale>
#endif
#if defined(BOOST_NO_STDC_NAMESPACE)
namespace std{
using ::sprintf; using ::strcpy; using ::strcat; using ::strlen;
}
#endif
namespace boost{ namespace re_detail{
#ifdef BOOST_NO_STD_DISTANCE
template <class T>
@ -112,4 +118,73 @@ inline void pointer_construct(T* p, const T& t)
}} // namespaces
#endif
/*****************************************************************************
*
* helper function copy:
*
****************************************************************************/
#ifdef __cplusplus
namespace boost{ namespace re_detail{
#if BOOST_WORKAROUND(BOOST_MSVC,>=1400)
//
// MSVC 8 will either emit warnings or else refuse to compile
// code that makes perfectly legitimate use of std::copy, when
// the OutputIterator type is a user-defined class (apparently all user
// defined iterators are "unsafe"). This code works around that:
//
template<class InputIterator, class OutputIterator>
inline OutputIterator copy(
InputIterator first,
InputIterator last,
OutputIterator dest
)
{
return stdext::unchecked_copy(first, last, dest);
}
// use safe versions of strcpy etc:
using ::strcpy_s;
using ::strcat_s;
#else
using std::copy;
inline std::size_t strcpy_s(
char *strDestination,
std::size_t sizeInBytes,
const char *strSource
)
{
if(std::strlen(strSource)+1 > sizeInBytes)
return 1;
std::strcpy(strDestination, strSource);
return 0;
}
inline std::size_t strcat_s(
char *strDestination,
std::size_t sizeInBytes,
const char *strSource
)
{
if(std::strlen(strSource) + std::strlen(strDestination) + 1 > sizeInBytes)
return 1;
std::strcat(strDestination, strSource);
return 0;
}
#endif
inline void overflow_error_if_not_zero(std::size_t i)
{
if(i)
{
std::overflow_error e("String buffer too small");
boost::throw_exception(e);
}
}
}} // namespaces
#endif
#endif // include guard