mirror of
https://github.com/boostorg/regex.git
synced 2025-07-15 13:26:38 +02:00
Update heuristics used to determine max state count, following a bug report from SAP.
Updated tests to match. [SVN r35656]
This commit is contained in:
@ -427,9 +427,9 @@ private:
|
||||
// matching flags in use:
|
||||
match_flag_type m_match_flags;
|
||||
// how many states we have examined so far:
|
||||
difference_type state_count;
|
||||
boost::uintmax_t state_count;
|
||||
// max number of states to examine before giving up:
|
||||
difference_type max_state_count;
|
||||
boost::uintmax_t max_state_count;
|
||||
// whether we should ignore case or not:
|
||||
bool icase;
|
||||
// set to true when (position == last), indicates that we may have a partial match:
|
||||
|
@ -77,15 +77,67 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
|
||||
{
|
||||
static const difference_type k = 100000;
|
||||
difference_type dist = boost::re_detail::distance(base, last);
|
||||
traits_size_type states = static_cast<traits_size_type>(re.size());
|
||||
//
|
||||
// How many states should we allow our machine to visit before giving up?
|
||||
// This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
|
||||
// where N is the length of the string, and S is the number of states
|
||||
// in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
|
||||
// but these take unreasonably amounts of time to bale out in pathological
|
||||
// cases.
|
||||
//
|
||||
// Calculate NS^2 first:
|
||||
//
|
||||
static const boost::uintmax_t k = 100000;
|
||||
boost::uintmax_t dist = boost::re_detail::distance(base, last);
|
||||
if(dist == 0)
|
||||
dist = 1;
|
||||
boost::uintmax_t states = re.size();
|
||||
if(states == 0)
|
||||
states = 1;
|
||||
states *= states;
|
||||
difference_type lim = ((std::numeric_limits<difference_type>::max)() - k) / states;
|
||||
if(dist >= lim)
|
||||
max_state_count = (std::numeric_limits<difference_type>::max)();
|
||||
else
|
||||
max_state_count = k + states * dist;
|
||||
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
|
||||
{
|
||||
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||
return;
|
||||
}
|
||||
states *= dist;
|
||||
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
|
||||
{
|
||||
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||
return;
|
||||
}
|
||||
states += k;
|
||||
|
||||
max_state_count = states;
|
||||
|
||||
//
|
||||
// Now calculate N^2:
|
||||
//
|
||||
states = dist;
|
||||
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
|
||||
{
|
||||
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||
return;
|
||||
}
|
||||
states *= dist;
|
||||
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
|
||||
{
|
||||
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||
return;
|
||||
}
|
||||
states += k;
|
||||
//
|
||||
// N^2 can be a very large number indeed, to prevent things getting out
|
||||
// of control, cap the max states:
|
||||
//
|
||||
if(states > BOOST_REGEX_MAX_STATE_COUNT)
|
||||
states = BOOST_REGEX_MAX_STATE_COUNT;
|
||||
//
|
||||
// If (the possibly capped) N^2 is larger than our first estimate,
|
||||
// use this instead:
|
||||
//
|
||||
if(states > max_state_count)
|
||||
max_state_count = states;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
|
@ -44,6 +44,12 @@ int test_main( int , char* [] )
|
||||
BOOST_CHECK_THROW(boost::regex_search(bad_text, what, e2), std::runtime_error);
|
||||
BOOST_CHECK(boost::regex_search(good_text, what, e2));
|
||||
|
||||
bad_text.assign((std::string::size_type)500000, 'a');
|
||||
e2.assign("aaa*@");
|
||||
BOOST_CHECK_THROW(0 == boost::regex_search(bad_text, what, e2), std::runtime_error);
|
||||
good_text.assign((std::string::size_type)5000, 'a');
|
||||
BOOST_CHECK(0 == boost::regex_search(good_text, what, e2));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user