From fbf5c7d62c09e49a0c0efcdaf3ddad81bb213596 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Thu, 16 Jun 2011 11:27:13 +0000 Subject: [PATCH] Fix infinite recursion in bad recursive expressions. Fix bug that allows invalid regex to go unnoticed and crash later. Fixes #5613. Fixes #5612. [SVN r72612] --- .../boost/regex/v4/basic_regex_creator.hpp | 23 +++++++++++++++---- include/boost/regex/v4/basic_regex_parser.hpp | 3 ++- test/regress/test_perl_ex.cpp | 1 + test/regress/test_simple_repeats.cpp | 3 +++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index efa9f7dd..6c1acd78 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -241,6 +241,7 @@ protected: unsigned m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; bool m_has_recursions; // set when we have recursive expresisons to fixup + std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character @@ -712,6 +713,8 @@ void basic_regex_creator::finalize(const charT* p1, const charT* m_pdata->m_can_be_null = 0; m_bad_repeats = 0; + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); // get the restart type: m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); @@ -948,9 +951,14 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) state = state->next.p; } } + // now work through our list, building all the maps as we go: while(v.size()) { + // Initialize m_recursion_checks if we need it: + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); + const std::pair& p = v.back(); m_icase = p.first; state = p.second; @@ -960,6 +968,9 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) m_bad_repeats = 0; create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); m_bad_repeats = 0; + + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); // adjust the type of the state to allow for faster matching: state->type = this->get_repeat_type(state); @@ -1114,7 +1125,11 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, } case syntax_element_recurse: { - if(recursion_start == state) + if(state->type == syntax_element_startmark) + recursion_sub = static_cast(state)->index; + else + recursion_sub = 0; + if(m_recursion_checks[recursion_sub]) { // Infinite recursion!! if(0 == this->m_pdata->m_status) // update the error code if not already set @@ -1139,12 +1154,10 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, recursion_start = state; recursion_restart = state->next.p; state = static_cast(state)->alt.p; - if(state->type == syntax_element_startmark) - recursion_sub = static_cast(state)->index; - else - recursion_sub = 0; + m_recursion_checks[recursion_sub] = true; break; } + m_recursion_checks[recursion_sub] = true; // fall through, can't handle nested recursion here... } case syntax_element_backref: diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index f9d4399b..87ff6e20 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1026,13 +1026,14 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ { // // Check for illegal following quantifier, we have to do this here, because - // the extra states we insert below circumvents are usual error checking :-( + // the extra states we insert below circumvents our usual error checking :-( // switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_star: case regex_constants::syntax_plus: case regex_constants::syntax_question: + case regex_constants::syntax_open_brace: fail(regex_constants::error_badrepeat, m_position - m_base); return false; } diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 3307d314..8a14b826 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -908,6 +908,7 @@ void test_recursion() // Bugs: TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2)); TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2)); + TEST_INVALID_REGEX("((?1)|a)", perl); // Recursion to a named sub with a name that is used multiple times: TEST_REGEX_SEARCH("(?:(?a+)|(?b+))\\.(?&A)", perl, "aaaa.aa", match_default, make_array(0, 7, 0, 4, -1, -1, -2, -2)); diff --git a/test/regress/test_simple_repeats.cpp b/test/regress/test_simple_repeats.cpp index 30bcae5a..044ca2fd 100644 --- a/test/regress/test_simple_repeats.cpp +++ b/test/regress/test_simple_repeats.cpp @@ -477,6 +477,9 @@ void test_pocessive_repeats() TEST_REGEX_SEARCH("x{1,4}+\\w", perl, "xxxxxa", match_default, make_array(0, 5, -2, -2)); TEST_REGEX_SEARCH("x{1,3}+\\w", perl, "xxxxxa", match_default, make_array(0, 4, -2, 4, 6, -2, -2)); TEST_INVALID_REGEX("\\d+++", perl); + TEST_INVALID_REGEX("\\d++*", perl); + TEST_INVALID_REGEX("\\d++?", perl); + TEST_INVALID_REGEX("\\d++{3}", perl); TEST_INVALID_REGEX("\\d*++", perl); TEST_INVALID_REGEX("\\d?++", perl); TEST_INVALID_REGEX("\\d{1,2}++", perl);