diff --git a/include/boost/regex/detail/regex_match.hpp b/include/boost/regex/detail/regex_match.hpp index a1b2e992..b84436e3 100644 --- a/include/boost/regex/detail/regex_match.hpp +++ b/include/boost/regex/detail/regex_match.hpp @@ -26,6 +26,12 @@ #ifndef BOOST_REGEX_MATCH_HPP #define BOOST_REGEX_MATCH_HPP +#ifndef BOOST_REGEX_MAX_STATE_COUNT +# define BOOST_REGEX_MAX_STATE_COUNT 100000000 +#endif + +#include + namespace boost{ namespace re_detail{ @@ -175,6 +181,7 @@ class _priv_match_data public: typedef typename boost::detail::rebind_allocator::type i_alloc; typedef typename boost::detail::rebind_allocator::type it_alloc; + typedef typename regex_iterator_traits::difference_type difference_type; match_results_base temp_match; // failure stacks: @@ -184,9 +191,11 @@ public: jstack prev_acc; int* accumulators; unsigned int caccumulators; + difference_type state_count; + difference_type max_state_count; iterator* loop_starts; - _priv_match_data(const match_results_base&); + _priv_match_data(const match_results_base&, iterator, iterator, unsigned); ~_priv_match_data() { @@ -202,15 +211,32 @@ public: { return loop_starts; } + void estimate_max_state_count(iterator a, iterator b, unsigned states, std::random_access_iterator_tag*) + { + difference_type dist = std::distance(a,b); + if(dist > (std::numeric_limits::max() / (states * 3))) + max_state_count = std::numeric_limits::max(); + else + max_state_count = states * 3 * dist; + if(max_state_count < 1000) + max_state_count = 1000; + } + void estimate_max_state_count(iterator a, iterator b, unsigned states, void*) + { + // we don't know how long the sequence is: + max_state_count = BOOST_REGEX_MAX_STATE_COUNT; + } }; template -_priv_match_data::_priv_match_data(const match_results_base& m) +_priv_match_data::_priv_match_data(const match_results_base& m, iterator a, iterator b, unsigned states) : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()) { accumulators = 0; caccumulators = 0; loop_starts = 0; + state_count = 0; + estimate_max_state_count(a, b, states, static_cast::iterator_category*>(0)); } template @@ -297,6 +323,7 @@ bool query_match_aux(iterator first, typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte::width_type width_type; + typedef typename re_detail::regex_iterator_traits::difference_type difference_type; // declare some local aliases to reduce pointer loads // good optimising compilers should make this unnecessary!! @@ -306,6 +333,7 @@ bool query_match_aux(iterator first, jstack& prev_acc = pd.prev_acc; match_results_base& temp_match = pd.temp_match; temp_match.set_first(first); + difference_type& state_count = pd.state_count; const re_syntax_base* ptr = access::first(e); bool match_found = false; @@ -337,6 +365,7 @@ bool query_match_aux(iterator first, while(first != last) { jm_assert(ptr); + ++state_count; switch(ptr->type) { case syntax_element_match: @@ -816,6 +845,7 @@ bool query_match_aux(iterator first, while(true) { jm_assert(ptr); + ++state_count; switch(ptr->type) { case syntax_element_match: @@ -929,6 +959,14 @@ bool query_match_aux(iterator first, failure: + // + // check to see if we've been searching too many states: + // + if(state_count >= pd.max_state_count) + { + throw std::runtime_error("Max regex search depth exceeded."); + } + // // check for possible partial match: // @@ -1177,7 +1215,7 @@ unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression pd(m); + _priv_match_data pd(m, first, last, e.size()); const unsigned char* _map = access::get_map(e); unsigned int type; @@ -1648,7 +1686,7 @@ bool regex_match(iterator first, iterator last, match_results pd(m); + re_detail::_priv_match_data pd(m, first, last, e.size()); iterator restart; bool result = re_detail::query_match_aux(first, last, m, e, flags, pd, &restart); return result;