mirror of
https://github.com/boostorg/regex.git
synced 2025-07-15 21:32:18 +02:00
Update heuristics used to determine max state count, following a bug report from SAP.
Updated tests to match. [SVN r35656]
This commit is contained in:
@ -427,9 +427,9 @@ private:
|
|||||||
// matching flags in use:
|
// matching flags in use:
|
||||||
match_flag_type m_match_flags;
|
match_flag_type m_match_flags;
|
||||||
// how many states we have examined so far:
|
// how many states we have examined so far:
|
||||||
difference_type state_count;
|
boost::uintmax_t state_count;
|
||||||
// max number of states to examine before giving up:
|
// max number of states to examine before giving up:
|
||||||
difference_type max_state_count;
|
boost::uintmax_t max_state_count;
|
||||||
// whether we should ignore case or not:
|
// whether we should ignore case or not:
|
||||||
bool icase;
|
bool icase;
|
||||||
// set to true when (position == last), indicates that we may have a partial match:
|
// set to true when (position == last), indicates that we may have a partial match:
|
||||||
|
@ -77,15 +77,67 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
|
|||||||
template <class BidiIterator, class Allocator, class traits>
|
template <class BidiIterator, class Allocator, class traits>
|
||||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
|
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
|
||||||
{
|
{
|
||||||
static const difference_type k = 100000;
|
//
|
||||||
difference_type dist = boost::re_detail::distance(base, last);
|
// How many states should we allow our machine to visit before giving up?
|
||||||
traits_size_type states = static_cast<traits_size_type>(re.size());
|
// This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
|
||||||
|
// where N is the length of the string, and S is the number of states
|
||||||
|
// in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
|
||||||
|
// but these take unreasonably amounts of time to bale out in pathological
|
||||||
|
// cases.
|
||||||
|
//
|
||||||
|
// Calculate NS^2 first:
|
||||||
|
//
|
||||||
|
static const boost::uintmax_t k = 100000;
|
||||||
|
boost::uintmax_t dist = boost::re_detail::distance(base, last);
|
||||||
|
if(dist == 0)
|
||||||
|
dist = 1;
|
||||||
|
boost::uintmax_t states = re.size();
|
||||||
|
if(states == 0)
|
||||||
|
states = 1;
|
||||||
states *= states;
|
states *= states;
|
||||||
difference_type lim = ((std::numeric_limits<difference_type>::max)() - k) / states;
|
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
|
||||||
if(dist >= lim)
|
{
|
||||||
max_state_count = (std::numeric_limits<difference_type>::max)();
|
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||||
else
|
return;
|
||||||
max_state_count = k + states * dist;
|
}
|
||||||
|
states *= dist;
|
||||||
|
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
|
||||||
|
{
|
||||||
|
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
states += k;
|
||||||
|
|
||||||
|
max_state_count = states;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Now calculate N^2:
|
||||||
|
//
|
||||||
|
states = dist;
|
||||||
|
if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
|
||||||
|
{
|
||||||
|
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
states *= dist;
|
||||||
|
if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
|
||||||
|
{
|
||||||
|
max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
states += k;
|
||||||
|
//
|
||||||
|
// N^2 can be a very large number indeed, to prevent things getting out
|
||||||
|
// of control, cap the max states:
|
||||||
|
//
|
||||||
|
if(states > BOOST_REGEX_MAX_STATE_COUNT)
|
||||||
|
states = BOOST_REGEX_MAX_STATE_COUNT;
|
||||||
|
//
|
||||||
|
// If (the possibly capped) N^2 is larger than our first estimate,
|
||||||
|
// use this instead:
|
||||||
|
//
|
||||||
|
if(states > max_state_count)
|
||||||
|
max_state_count = states;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class BidiIterator, class Allocator, class traits>
|
template <class BidiIterator, class Allocator, class traits>
|
||||||
|
@ -44,6 +44,12 @@ int test_main( int , char* [] )
|
|||||||
BOOST_CHECK_THROW(boost::regex_search(bad_text, what, e2), std::runtime_error);
|
BOOST_CHECK_THROW(boost::regex_search(bad_text, what, e2), std::runtime_error);
|
||||||
BOOST_CHECK(boost::regex_search(good_text, what, e2));
|
BOOST_CHECK(boost::regex_search(good_text, what, e2));
|
||||||
|
|
||||||
|
bad_text.assign((std::string::size_type)500000, 'a');
|
||||||
|
e2.assign("aaa*@");
|
||||||
|
BOOST_CHECK_THROW(0 == boost::regex_search(bad_text, what, e2), std::runtime_error);
|
||||||
|
good_text.assign((std::string::size_type)5000, 'a');
|
||||||
|
BOOST_CHECK(0 == boost::regex_search(good_text, what, e2));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user