mirror of
https://github.com/boostorg/regex.git
synced 2025-07-25 18:17:27 +02:00
Improve recursion branch-prediction.
[SVN r58466]
This commit is contained in:
@ -677,6 +677,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
|
|||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
|
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
|
||||||
{
|
{
|
||||||
|
if(this->m_pdata->m_status)
|
||||||
|
return;
|
||||||
// we've added all the states we need, now finish things off.
|
// we've added all the states we need, now finish things off.
|
||||||
// start by adding a terminating state:
|
// start by adding a terminating state:
|
||||||
append_state(syntax_element_match);
|
append_state(syntax_element_match);
|
||||||
@ -698,6 +700,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
|
|||||||
{
|
{
|
||||||
m_pdata->m_has_recursions = true;
|
m_pdata->m_has_recursions = true;
|
||||||
fixup_recursions(m_pdata->m_first_state);
|
fixup_recursions(m_pdata->m_first_state);
|
||||||
|
if(this->m_pdata->m_status)
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
m_pdata->m_has_recursions = false;
|
m_pdata->m_has_recursions = false;
|
||||||
@ -1012,6 +1016,9 @@ template <class charT, class traits>
|
|||||||
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
|
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
|
||||||
{
|
{
|
||||||
int not_last_jump = 1;
|
int not_last_jump = 1;
|
||||||
|
re_syntax_base* recursion_start = 0;
|
||||||
|
int recursion_sub = 0;
|
||||||
|
re_syntax_base* recursion_restart = 0;
|
||||||
|
|
||||||
// track case sensitivity:
|
// track case sensitivity:
|
||||||
bool l_icase = m_icase;
|
bool l_icase = m_icase;
|
||||||
@ -1057,6 +1064,40 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case syntax_element_recurse:
|
case syntax_element_recurse:
|
||||||
|
{
|
||||||
|
if(recursion_start == state)
|
||||||
|
{
|
||||||
|
// Infinite recursion!!
|
||||||
|
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
||||||
|
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
||||||
|
//
|
||||||
|
// clear the expression, we should be empty:
|
||||||
|
//
|
||||||
|
this->m_pdata->m_expression = 0;
|
||||||
|
this->m_pdata->m_expression_len = 0;
|
||||||
|
//
|
||||||
|
// and throw if required:
|
||||||
|
//
|
||||||
|
if(0 == (this->flags() & regex_constants::no_except))
|
||||||
|
{
|
||||||
|
std::string message = "Encountered an infinite recursion.";
|
||||||
|
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
||||||
|
e.raise();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(recursion_start == 0)
|
||||||
|
{
|
||||||
|
recursion_start = state;
|
||||||
|
recursion_restart = state->next.p;
|
||||||
|
state = static_cast<re_jump*>(state)->alt.p;
|
||||||
|
if(state->type == syntax_element_startmark)
|
||||||
|
recursion_sub = static_cast<re_brace*>(state)->index;
|
||||||
|
else
|
||||||
|
recursion_sub = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// fall through, can't handle nested recursion here...
|
||||||
|
}
|
||||||
case syntax_element_backref:
|
case syntax_element_backref:
|
||||||
// can be null, and any character can match:
|
// can be null, and any character can match:
|
||||||
if(pnull)
|
if(pnull)
|
||||||
@ -1215,12 +1256,45 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
|||||||
*pnull |= mask;
|
*pnull |= mask;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
|
||||||
{
|
{
|
||||||
state = state->next.p;
|
// recursion termination:
|
||||||
|
recursion_start = 0;
|
||||||
|
state = recursion_restart;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Normally we just go to the next state... but if this sub-expression is
|
||||||
|
// the target of a recursion, then we might be ending a recursion, in which
|
||||||
|
// case we should check whatever follows that recursion, as well as whatever
|
||||||
|
// follows this state:
|
||||||
|
//
|
||||||
|
if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
|
||||||
|
{
|
||||||
|
bool ok = false;
|
||||||
|
re_syntax_base* p = m_pdata->m_first_state;
|
||||||
|
while(p)
|
||||||
|
{
|
||||||
|
if((p->type == syntax_element_recurse))
|
||||||
|
{
|
||||||
|
re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
|
||||||
|
if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
|
||||||
|
{
|
||||||
|
ok = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p = p->next.p;
|
||||||
|
}
|
||||||
|
if(ok)
|
||||||
|
{
|
||||||
|
create_startmap(p->next.p, l_map, pnull, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state = state->next.p;
|
||||||
|
break;
|
||||||
|
|
||||||
case syntax_element_startmark:
|
case syntax_element_startmark:
|
||||||
// need to handle independent subs as a special case:
|
// need to handle independent subs as a special case:
|
||||||
if(static_cast<re_brace*>(state)->index == -3)
|
if(static_cast<re_brace*>(state)->index == -3)
|
||||||
|
Reference in New Issue
Block a user