Fix infinite recursion in bad recursive expressions.

Fix bug that allows invalid regex to go unnoticed and crash later.
Fixes #5613.
Fixes #5612.

[SVN r72612]
This commit is contained in:
John Maddock
2011-06-16 11:27:13 +00:00
parent 6c181b02ba
commit fbf5c7d62c
4 changed files with 24 additions and 6 deletions

View File

@ -241,6 +241,7 @@ protected:
unsigned m_backrefs; // bitmask of permitted backrefs unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
bool m_has_recursions; // set when we have recursive expresisons to fixup bool m_has_recursions; // set when we have recursive expresisons to fixup
std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
@ -712,6 +713,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
m_pdata->m_can_be_null = 0; m_pdata->m_can_be_null = 0;
m_bad_repeats = 0; m_bad_repeats = 0;
if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
// get the restart type: // get the restart type:
m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
@ -948,9 +951,14 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
state = state->next.p; state = state->next.p;
} }
} }
// now work through our list, building all the maps as we go: // now work through our list, building all the maps as we go:
while(v.size()) while(v.size())
{ {
// Initialize m_recursion_checks if we need it:
if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
const std::pair<bool, re_syntax_base*>& p = v.back(); const std::pair<bool, re_syntax_base*>& p = v.back();
m_icase = p.first; m_icase = p.first;
state = p.second; state = p.second;
@ -960,6 +968,9 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
m_bad_repeats = 0; m_bad_repeats = 0;
create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take); create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
m_bad_repeats = 0; m_bad_repeats = 0;
if(m_has_recursions)
m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip); create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
// adjust the type of the state to allow for faster matching: // adjust the type of the state to allow for faster matching:
state->type = this->get_repeat_type(state); state->type = this->get_repeat_type(state);
@ -1114,7 +1125,11 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
} }
case syntax_element_recurse: case syntax_element_recurse:
{ {
if(recursion_start == state) if(state->type == syntax_element_startmark)
recursion_sub = static_cast<re_brace*>(state)->index;
else
recursion_sub = 0;
if(m_recursion_checks[recursion_sub])
{ {
// Infinite recursion!! // Infinite recursion!!
if(0 == this->m_pdata->m_status) // update the error code if not already set if(0 == this->m_pdata->m_status) // update the error code if not already set
@ -1139,12 +1154,10 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
recursion_start = state; recursion_start = state;
recursion_restart = state->next.p; recursion_restart = state->next.p;
state = static_cast<re_jump*>(state)->alt.p; state = static_cast<re_jump*>(state)->alt.p;
if(state->type == syntax_element_startmark) m_recursion_checks[recursion_sub] = true;
recursion_sub = static_cast<re_brace*>(state)->index;
else
recursion_sub = 0;
break; break;
} }
m_recursion_checks[recursion_sub] = true;
// fall through, can't handle nested recursion here... // fall through, can't handle nested recursion here...
} }
case syntax_element_backref: case syntax_element_backref:

View File

@ -1026,13 +1026,14 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
{ {
// //
// Check for illegal following quantifier, we have to do this here, because // Check for illegal following quantifier, we have to do this here, because
// the extra states we insert below circumvents are usual error checking :-( // the extra states we insert below circumvents our usual error checking :-(
// //
switch(this->m_traits.syntax_type(*m_position)) switch(this->m_traits.syntax_type(*m_position))
{ {
case regex_constants::syntax_star: case regex_constants::syntax_star:
case regex_constants::syntax_plus: case regex_constants::syntax_plus:
case regex_constants::syntax_question: case regex_constants::syntax_question:
case regex_constants::syntax_open_brace:
fail(regex_constants::error_badrepeat, m_position - m_base); fail(regex_constants::error_badrepeat, m_position - m_base);
return false; return false;
} }

View File

@ -908,6 +908,7 @@ void test_recursion()
// Bugs: // Bugs:
TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2)); TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2));
TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2)); TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2));
TEST_INVALID_REGEX("((?1)|a)", perl);
// Recursion to a named sub with a name that is used multiple times: // Recursion to a named sub with a name that is used multiple times:
TEST_REGEX_SEARCH("(?:(?<A>a+)|(?<A>b+))\\.(?&A)", perl, "aaaa.aa", match_default, make_array(0, 7, 0, 4, -1, -1, -2, -2)); TEST_REGEX_SEARCH("(?:(?<A>a+)|(?<A>b+))\\.(?&A)", perl, "aaaa.aa", match_default, make_array(0, 7, 0, 4, -1, -1, -2, -2));

View File

@ -477,6 +477,9 @@ void test_pocessive_repeats()
TEST_REGEX_SEARCH("x{1,4}+\\w", perl, "xxxxxa", match_default, make_array(0, 5, -2, -2)); TEST_REGEX_SEARCH("x{1,4}+\\w", perl, "xxxxxa", match_default, make_array(0, 5, -2, -2));
TEST_REGEX_SEARCH("x{1,3}+\\w", perl, "xxxxxa", match_default, make_array(0, 4, -2, 4, 6, -2, -2)); TEST_REGEX_SEARCH("x{1,3}+\\w", perl, "xxxxxa", match_default, make_array(0, 4, -2, 4, 6, -2, -2));
TEST_INVALID_REGEX("\\d+++", perl); TEST_INVALID_REGEX("\\d+++", perl);
TEST_INVALID_REGEX("\\d++*", perl);
TEST_INVALID_REGEX("\\d++?", perl);
TEST_INVALID_REGEX("\\d++{3}", perl);
TEST_INVALID_REGEX("\\d*++", perl); TEST_INVALID_REGEX("\\d*++", perl);
TEST_INVALID_REGEX("\\d?++", perl); TEST_INVALID_REGEX("\\d?++", perl);
TEST_INVALID_REGEX("\\d{1,2}++", perl); TEST_INVALID_REGEX("\\d{1,2}++", perl);