Merge recursive expressions performance update from Trunk.

Merge Makefile update from Trunk.
Merge history update.

[SVN r58542]
This commit is contained in:
John Maddock
2009-12-28 13:07:04 +00:00
parent ae79f29895
commit e41acbc33f
47 changed files with 3593 additions and 3511 deletions

View File

@ -677,6 +677,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
template <class charT, class traits>
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
{
if(this->m_pdata->m_status)
return;
// we've added all the states we need, now finish things off.
// start by adding a terminating state:
append_state(syntax_element_match);
@ -698,6 +700,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
{
m_pdata->m_has_recursions = true;
fixup_recursions(m_pdata->m_first_state);
if(this->m_pdata->m_status)
return;
}
else
m_pdata->m_has_recursions = false;
@ -1012,6 +1016,9 @@ template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
{
int not_last_jump = 1;
re_syntax_base* recursion_start = 0;
int recursion_sub = 0;
re_syntax_base* recursion_restart = 0;
// track case sensitivity:
bool l_icase = m_icase;
@ -1057,6 +1064,40 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
return;
}
case syntax_element_recurse:
{
if(recursion_start == state)
{
// Infinite recursion!!
if(0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
//
// clear the expression, we should be empty:
//
this->m_pdata->m_expression = 0;
this->m_pdata->m_expression_len = 0;
//
// and throw if required:
//
if(0 == (this->flags() & regex_constants::no_except))
{
std::string message = "Encountered an infinite recursion.";
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
e.raise();
}
}
else if(recursion_start == 0)
{
recursion_start = state;
recursion_restart = state->next.p;
state = static_cast<re_jump*>(state)->alt.p;
if(state->type == syntax_element_startmark)
recursion_sub = static_cast<re_brace*>(state)->index;
else
recursion_sub = 0;
break;
}
// fall through, can't handle nested recursion here...
}
case syntax_element_backref:
// can be null, and any character can match:
if(pnull)
@ -1215,12 +1256,45 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
*pnull |= mask;
return;
}
else
else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
{
state = state->next.p;
// recursion termination:
recursion_start = 0;
state = recursion_restart;
break;
}
//
// Normally we just go to the next state... but if this sub-expression is
// the target of a recursion, then we might be ending a recursion, in which
// case we should check whatever follows that recursion, as well as whatever
// follows this state:
//
if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
{
bool ok = false;
re_syntax_base* p = m_pdata->m_first_state;
while(p)
{
if((p->type == syntax_element_recurse))
{
re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
{
ok = true;
break;
}
}
p = p->next.p;
}
if(ok)
{
create_startmap(p->next.p, l_map, pnull, mask);
}
}
state = state->next.p;
break;
case syntax_element_startmark:
// need to handle independent subs as a special case:
if(static_cast<re_brace*>(state)->index == -3)

View File

@ -195,8 +195,8 @@ void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_c
// Augment error message with the regular expression text:
//
if(start_pos == position)
start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - 10);
std::ptrdiff_t end_pos = (std::min)(position + 10, m_end - m_base);
start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
if(error_code != regex_constants::error_empty)
{
if((start_pos != 0) || (end_pos != (m_end - m_base)))
@ -1683,7 +1683,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
}
else
{
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
int i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0)
|| !valid_value(charT(0), i))

View File

@ -61,7 +61,7 @@ inline bool can_start(unsigned short c, const unsigned char* map, unsigned char
{
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
}
#if !defined(__hpux) // WCHAR_MIN not usable in pp-directives.
#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives.
#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
{

View File

@ -84,7 +84,7 @@ inline bool is_combining<unsigned char>(unsigned char)
{
return false;
}
#ifndef __hpux // can't use WCHAR_MAX/MIN in pp-directives
#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
#ifdef _MSC_VER
template<>
inline bool is_combining<wchar_t>(wchar_t c)