Update heuristics used to determine max state count, following a bug report from SAP.

Updated tests to match.


[SVN r35656]
This commit is contained in:
John Maddock
2006-10-18 12:56:45 +00:00
parent 67128fa65f
commit b057d0f943
3 changed files with 68 additions and 10 deletions

View File

@ -427,9 +427,9 @@ private:
// matching flags in use:
match_flag_type m_match_flags;
// how many states we have examined so far:
difference_type state_count;
boost::uintmax_t state_count;
// max number of states to examine before giving up:
difference_type max_state_count;
boost::uintmax_t max_state_count;
// whether we should ignore case or not:
bool icase;
// set to true when (position == last), indicates that we may have a partial match:

View File

@ -77,15 +77,67 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
{
static const difference_type k = 100000;
difference_type dist = boost::re_detail::distance(base, last);
traits_size_type states = static_cast<traits_size_type>(re.size());
//
// How many states should we allow our machine to visit before giving up?
// This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
// where N is the length of the string, and S is the number of states
// in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
// but these take unreasonably amounts of time to bale out in pathological
// cases.
//
// Calculate NS^2 first:
//
static const boost::uintmax_t k = 100000;
boost::uintmax_t dist = boost::re_detail::distance(base, last);
if(dist == 0)
dist = 1;
boost::uintmax_t states = re.size();
if(states == 0)
states = 1;
states *= states;
difference_type lim = ((std::numeric_limits<difference_type>::max)() - k) / states;
if(dist >= lim)
max_state_count = (std::numeric_limits<difference_type>::max)();
else
max_state_count = k + states * dist;
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
{
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
return;
}
states *= dist;
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
{
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
return;
}
states += k;
max_state_count = states;
//
// Now calculate N^2:
//
states = dist;
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
{
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
return;
}
states *= dist;
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
{
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
return;
}
states += k;
//
// N^2 can be a very large number indeed, to prevent things getting out
// of control, cap the max states:
//
if(states > BOOST_REGEX_MAX_STATE_COUNT)
states = BOOST_REGEX_MAX_STATE_COUNT;
//
// If (the possibly capped) N^2 is larger than our first estimate,
// use this instead:
//
if(states > max_state_count)
max_state_count = states;
}
template <class BidiIterator, class Allocator, class traits>

View File

@ -44,6 +44,12 @@ int test_main( int , char* [] )
BOOST_CHECK_THROW(boost::regex_search(bad_text, what, e2), std::runtime_error);
BOOST_CHECK(boost::regex_search(good_text, what, e2));
bad_text.assign((std::string::size_type)500000, 'a');
e2.assign("aaa*@");
BOOST_CHECK_THROW(0 == boost::regex_search(bad_text, what, e2), std::runtime_error);
good_text.assign((std::string::size_type)5000, 'a');
BOOST_CHECK(0 == boost::regex_search(good_text, what, e2));
return 0;
}