diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index d847b96f..0782e456 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -677,6 +677,8 @@ re_syntax_base* basic_regex_creator::append_set( template void basic_regex_creator::finalize(const charT* p1, const charT* p2) { + if(this->m_pdata->m_status) + return; // we've added all the states we need, now finish things off. // start by adding a terminating state: append_state(syntax_element_match); @@ -698,6 +700,8 @@ void basic_regex_creator::finalize(const charT* p1, const charT* { m_pdata->m_has_recursions = true; fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; } else m_pdata->m_has_recursions = false; @@ -1012,6 +1016,9 @@ template void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) { int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; // track case sensitivity: bool l_icase = m_icase; @@ -1057,6 +1064,40 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, return; } case syntax_element_recurse: + { + if(recursion_start == state) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast(state)->alt.p; + if(state->type == syntax_element_startmark) + recursion_sub = static_cast(state)->index; + else + recursion_sub = 0; + break; + } + // fall through, can't handle nested recursion here... + } case syntax_element_backref: // can be null, and any character can match: if(pnull) @@ -1215,12 +1256,45 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, *pnull |= mask; return; } - else + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast(state)->index)) { - state = state->next.p; + // recursion termination: + recursion_start = 0; + state = recursion_restart; break; } + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if((p->type == syntax_element_recurse)) + { + re_brace* p2 = static_cast(static_cast(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok) + { + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + case syntax_element_startmark: // need to handle independent subs as a special case: if(static_cast(state)->index == -3)