mirror of
https://github.com/boostorg/regex.git
synced 2025-07-23 09:07:25 +02:00
1) Disabled recursive implementation for VC8: stack overflows can't be reliably detected unless the whole program is compiled with asynchronous exceptions.
2) Changed std::copy calls on VC8 to avoid "dangerous code" warnings. 3) Moved backreference and octal escape code into line with POSIX-extended requirements. 4) Changed match_results leftmost-longest rules to stop unnecessary std::distance computations (an optimisation for non-random access iterators). 5) Changed C lib calls to use "safe" versions of string API's where available. 6) Added many new POSIX-extended leftmost-longest tests, to verify the above. [SVN r27880]
This commit is contained in:
@ -319,7 +319,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page();
|
||||
****************************************************************************/
|
||||
|
||||
#if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE)
|
||||
# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG)
|
||||
# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(BOOST_MSVC) && (BOOST_MSVC >= 1400))
|
||||
# define BOOST_REGEX_RECURSIVE
|
||||
# else
|
||||
# define BOOST_REGEX_NON_RECURSIVE
|
||||
|
@ -728,7 +728,7 @@ OutputIterator do_regex_replace(OutputIterator out,
|
||||
if(i == j)
|
||||
{
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(in.first, in.second, out);
|
||||
out = re_detail::copy(in.first, in.second, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -736,7 +736,7 @@ OutputIterator do_regex_replace(OutputIterator out,
|
||||
while(i != j)
|
||||
{
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(i->prefix().first, i->prefix().second, out);
|
||||
out = re_detail::copy(i->prefix().first, i->prefix().second, out);
|
||||
out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.end(), flags, e.get_traits());
|
||||
last_m = (*i)[0].second;
|
||||
if(flags & regex_constants::format_first_only)
|
||||
@ -744,7 +744,7 @@ OutputIterator do_regex_replace(OutputIterator out,
|
||||
++i;
|
||||
}
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(last_m, in.second, out);
|
||||
out = re_detail::copy(last_m, in.second, out);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -435,10 +435,10 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
return 0;
|
||||
}
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
|
||||
std::copy(s1.begin(), s1.end(), p);
|
||||
re_detail::copy(s1.begin(), s1.end(), p);
|
||||
p[s1.size()] = charT(0);
|
||||
p += s1.size() + 1;
|
||||
std::copy(s2.begin(), s2.end(), p);
|
||||
re_detail::copy(s2.begin(), s2.end(), p);
|
||||
p[s2.size()] = charT(0);
|
||||
}
|
||||
//
|
||||
@ -459,7 +459,7 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
||||
if(s.empty())
|
||||
return 0; // invalid or unsupported equivalence class
|
||||
charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
|
||||
std::copy(s.begin(), s.end(), p);
|
||||
re_detail::copy(s.begin(), s.end(), p);
|
||||
p[s.size()] = charT(0);
|
||||
++first;
|
||||
}
|
||||
@ -620,7 +620,7 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
|
||||
m_pdata->m_expression_len = len;
|
||||
charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
|
||||
m_pdata->m_expression = ps;
|
||||
std::copy(p1, p2, ps);
|
||||
re_detail::copy(p1, p2, ps);
|
||||
ps[p2 - p1] = 0;
|
||||
// fill in our other data...
|
||||
// successful parsing implies a zero status:
|
||||
|
@ -1422,7 +1422,15 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
||||
// an octal escape sequence, the first character must be a zero
|
||||
// followed by up to 3 octal digits:
|
||||
std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
|
||||
int val = this->m_traits.toi(m_position, m_position + len, 8);
|
||||
const charT* bp = m_position;
|
||||
int val = this->m_traits.toi(bp, bp + 1, 8);
|
||||
if(val != 0)
|
||||
{
|
||||
// Oops not an octal escape after all:
|
||||
fail(regex_constants::error_escape, m_position - m_base);
|
||||
return result;
|
||||
}
|
||||
val = this->m_traits.toi(m_position, m_position + len, 8);
|
||||
if(val < 0)
|
||||
{
|
||||
fail(regex_constants::error_escape, m_position - m_base);
|
||||
@ -1477,19 +1485,20 @@ template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_backref()
|
||||
{
|
||||
BOOST_ASSERT(m_position != m_end);
|
||||
int i = this->m_traits.toi(m_position, m_position + 1, 10);
|
||||
if((i > 0) && (this->m_backrefs & (1u << (i-1))))
|
||||
{
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
pb->index = i;
|
||||
}
|
||||
else if(i == 0)
|
||||
const charT* pc = m_position;
|
||||
int i = this->m_traits.toi(pc, pc + 1, 10);
|
||||
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
|
||||
{
|
||||
// not a backref at all but an octal escape sequence:
|
||||
--m_position;
|
||||
charT c = unescape_character();
|
||||
this->append_literal(c);
|
||||
}
|
||||
else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
|
||||
{
|
||||
m_position = pc;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
pb->index = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_end);
|
||||
|
@ -300,29 +300,73 @@ void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const
|
||||
const_iterator p1, p2;
|
||||
p1 = begin();
|
||||
p2 = m.begin();
|
||||
BidiIterator base = (*this)[-1].first;
|
||||
std::size_t len1 = 0;
|
||||
std::size_t len2 = 0;
|
||||
std::size_t base1 = 0;
|
||||
std::size_t base2 = 0;
|
||||
//
|
||||
// Distances are measured from the start of *this* match, unless this isn't
|
||||
// a valid match in which case we use the start of the whole sequence. Note that
|
||||
// no subsequent match-candidate can ever be to the left of the first match found.
|
||||
// This ensures that when we are using bidirectional iterators, that distances
|
||||
// measured are as short as possible, and therefore as efficient as possible
|
||||
// to compute. Finally note that we don't use the "matched" data member to test
|
||||
// whether a sub-expression is a valid match, because partial matches set this
|
||||
// to false for sub-expression 0.
|
||||
//
|
||||
BidiIterator end = this->suffix().second;
|
||||
BidiIterator base = (p1->first == end) ? this->prefix().first : (*this)[0].first;
|
||||
difference_type len1 = 0;
|
||||
difference_type len2 = 0;
|
||||
difference_type base1 = 0;
|
||||
difference_type base2 = 0;
|
||||
std::size_t i;
|
||||
for(i = 0; i < size(); ++i)
|
||||
for(i = 0; i < size(); ++i, ++p1, ++p2)
|
||||
{
|
||||
//
|
||||
// leftmost takes priority over longest:
|
||||
// Leftmost takes priority over longest; handle special cases
|
||||
// where distances need not be computed first (an optimisation
|
||||
// for bidirectional iterators: ensure that we don't accidently
|
||||
// compute the length of the whole sequence, as this can be really
|
||||
// expensive).
|
||||
//
|
||||
if(p1->first == end)
|
||||
{
|
||||
if(p2->first != end)
|
||||
{
|
||||
// p2 must be better than p1, and no need to calculate
|
||||
// actual distances:
|
||||
base1 = 1;
|
||||
base2 = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// *p1 and *p2 are either unmatched or match end-of sequence,
|
||||
// either way no need to calculate distances:
|
||||
if((p1->matched == false) && (p2->matched == true))
|
||||
break;
|
||||
if((p1->matched == true) && (p2->matched == false))
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if(p2->first == end)
|
||||
{
|
||||
// p1 better than p2, and no need to calculate distances:
|
||||
return;
|
||||
}
|
||||
base1 = ::boost::re_detail::distance(base, p1->first);
|
||||
base2 = ::boost::re_detail::distance(base, p2->first);
|
||||
BOOST_ASSERT(base1 >= 0);
|
||||
BOOST_ASSERT(base2 >= 0);
|
||||
if(base1 < base2) return;
|
||||
if(base2 < base1) break;
|
||||
|
||||
len1 = ::boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second);
|
||||
len2 = ::boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second);
|
||||
BOOST_ASSERT(len1 >= 0);
|
||||
BOOST_ASSERT(len2 >= 0);
|
||||
if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
|
||||
break;
|
||||
if((p1->matched == true) && (p2->matched == false))
|
||||
return;
|
||||
++p1;
|
||||
++p2;
|
||||
}
|
||||
if(i == size())
|
||||
return;
|
||||
|
@ -524,6 +524,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match
|
||||
|
||||
template <class S>
|
||||
class string_out_iterator
|
||||
#ifndef BOOST_NO_STD_ITERATOR
|
||||
: public std::iterator<std::output_iterator_tag, typename S::value_type>
|
||||
#endif
|
||||
{
|
||||
S* out;
|
||||
public:
|
||||
@ -537,11 +540,13 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifdef BOOST_NO_STD_ITERATOR
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef typename S::value_type value_type;
|
||||
typedef value_type* pointer;
|
||||
typedef value_type& reference;
|
||||
typedef std::output_iterator_tag iterator_category;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
|
||||
@ -554,7 +559,7 @@ OutputIterator regex_format_imp(OutputIterator out,
|
||||
{
|
||||
if(flags & regex_constants::format_literal)
|
||||
{
|
||||
return std::copy(p1, p2, out);
|
||||
return re_detail::copy(p1, p2, out);
|
||||
}
|
||||
|
||||
re_detail::basic_regex_formatter<
|
||||
|
@ -41,7 +41,7 @@ OutputIterator regex_replace(OutputIterator out,
|
||||
if(i == j)
|
||||
{
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(first, last, out);
|
||||
out = re_detail::copy(first, last, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -49,7 +49,7 @@ OutputIterator regex_replace(OutputIterator out,
|
||||
while(i != j)
|
||||
{
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(i->prefix().first, i->prefix().second, out);
|
||||
out = re_detail::copy(i->prefix().first, i->prefix().second, out);
|
||||
out = i->format(out, fmt, flags, e);
|
||||
last_m = (*i)[0].second;
|
||||
if(flags & regex_constants::format_first_only)
|
||||
@ -57,7 +57,7 @@ OutputIterator regex_replace(OutputIterator out,
|
||||
++i;
|
||||
}
|
||||
if(!(flags & regex_constants::format_no_copy))
|
||||
out = std::copy(last_m, last, out);
|
||||
out = re_detail::copy(last_m, last, out);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 1998-2004
|
||||
* Copyright (c) 1998-2005
|
||||
* John Maddock
|
||||
*
|
||||
* Use, modification and distribution are subject to the
|
||||
@ -43,6 +43,12 @@
|
||||
# include <locale>
|
||||
#endif
|
||||
|
||||
#if defined(BOOST_NO_STDC_NAMESPACE)
|
||||
namespace std{
|
||||
using ::sprintf; using ::strcpy; using ::strcat; using ::strlen;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace boost{ namespace re_detail{
|
||||
#ifdef BOOST_NO_STD_DISTANCE
|
||||
template <class T>
|
||||
@ -112,4 +118,73 @@ inline void pointer_construct(T* p, const T& t)
|
||||
}} // namespaces
|
||||
#endif
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* helper function copy:
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace boost{ namespace re_detail{
|
||||
#if BOOST_WORKAROUND(BOOST_MSVC,>=1400)
|
||||
//
|
||||
// MSVC 8 will either emit warnings or else refuse to compile
|
||||
// code that makes perfectly legitimate use of std::copy, when
|
||||
// the OutputIterator type is a user-defined class (apparently all user
|
||||
// defined iterators are "unsafe"). This code works around that:
|
||||
//
|
||||
template<class InputIterator, class OutputIterator>
|
||||
inline OutputIterator copy(
|
||||
InputIterator first,
|
||||
InputIterator last,
|
||||
OutputIterator dest
|
||||
)
|
||||
{
|
||||
return stdext::unchecked_copy(first, last, dest);
|
||||
}
|
||||
|
||||
// use safe versions of strcpy etc:
|
||||
using ::strcpy_s;
|
||||
using ::strcat_s;
|
||||
#else
|
||||
using std::copy;
|
||||
|
||||
inline std::size_t strcpy_s(
|
||||
char *strDestination,
|
||||
std::size_t sizeInBytes,
|
||||
const char *strSource
|
||||
)
|
||||
{
|
||||
if(std::strlen(strSource)+1 > sizeInBytes)
|
||||
return 1;
|
||||
std::strcpy(strDestination, strSource);
|
||||
return 0;
|
||||
}
|
||||
inline std::size_t strcat_s(
|
||||
char *strDestination,
|
||||
std::size_t sizeInBytes,
|
||||
const char *strSource
|
||||
)
|
||||
{
|
||||
if(std::strlen(strSource) + std::strlen(strDestination) + 1 > sizeInBytes)
|
||||
return 1;
|
||||
std::strcat(strDestination, strSource);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline void overflow_error_if_not_zero(std::size_t i)
|
||||
{
|
||||
if(i)
|
||||
{
|
||||
std::overflow_error e("String buffer too small");
|
||||
boost::throw_exception(e);
|
||||
}
|
||||
}
|
||||
|
||||
}} // namespaces
|
||||
#endif
|
||||
|
||||
#endif // include guard
|
||||
|
||||
|
Reference in New Issue
Block a user