Add COMMIT support plus lots of tests.

This commit is contained in:
jzmaddock
2015-09-29 17:40:43 +01:00
parent 2580fb035f
commit c281c9cc40
10 changed files with 308 additions and 19 deletions

View File

@ -164,9 +164,9 @@ struct regex_data : public named_subexpressions
regex_data(const ::boost::shared_ptr<
::boost::regex_traits_wrapper<traits> >& t)
: m_ptraits(t), m_expression(0), m_expression_len(0) {}
: m_ptraits(t), m_expression(0), m_expression_len(0), m_disable_match_any(false) {}
regex_data()
: m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0) {}
: m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0), m_disable_match_any(false) {}
::boost::shared_ptr<
::boost::regex_traits_wrapper<traits>
@ -186,6 +186,7 @@ struct regex_data : public named_subexpressions
std::pair<
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
bool m_has_recursions; // whether we have recursive expressions;
bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour.
};
//
// class basic_regex_implementation

View File

@ -1149,6 +1149,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
set_all_masks(l_map, mask);
return;
}
case syntax_element_accept:
case syntax_element_match:
{
// must be null, any character can match:
@ -1335,6 +1336,11 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
state = state->next.p;
break;
case syntax_element_commit:
set_all_masks(l_map, mask);
// Continue scanning so we can figure out whether we can be null:
state = state->next.p;
break;
case syntax_element_startmark:
// need to handle independent subs as a special case:
if(static_cast<re_brace*>(state)->index == -3)

View File

@ -2740,6 +2740,31 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
return true;
}
break;
case 'C':
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if(match_verb("OMMIT"))
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++m_position;
this->append_state(syntax_element_commit);
this->m_pdata->m_disable_match_any = true;
return true;
}
break;
}
return false;
}

View File

@ -445,6 +445,8 @@ private:
bool match_recursion();
bool match_fail();
bool match_accept();
bool match_commit();
bool skip_until_paren(int index, bool match = true);
// find procs stored in s_find_vtable:
bool find_restart_any();
@ -527,6 +529,7 @@ private:
bool unwind_non_greedy_repeat(bool);
bool unwind_recursion(bool);
bool unwind_recursion_pop(bool);
bool unwind_commit(bool);
void destroy_single_repeat();
void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
void push_recursion_stopper();

View File

@ -85,6 +85,9 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
m_word_mask = re.get_data().m_word_mask;
// find bitmask to use for matching '.':
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
// Disable match_any if requested in the state machine:
if(e.get_data().m_disable_match_any)
m_match_flags &= ~regex_constants::match_any;
}
template <class BidiIterator, class Allocator, class traits>
@ -800,21 +803,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
return false;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
{
// Almost the same as match_match, but we need to close any half-open capturing groups:
for(unsigned i = 1; i < m_result.size(); ++i)
{
if((m_result[i].matched == false) && (m_result[i].first != last))
{
m_result.set_second(position, i);
}
}
return match_match();
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
{

View File

@ -141,7 +141,7 @@ struct saved_recursion : public saved_state
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[32] =
static matcher_proc_type const s_match_vtable[33] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -179,6 +179,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
&perl_matcher<BidiIterator, Allocator, traits>::match_accept,
&perl_matcher<BidiIterator, Allocator, traits>::match_commit,
};
push_recursion_stopper();
@ -1006,6 +1007,116 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
{
// Ideally we would just junk all the states that are on the stack,
// however we might not unwind correctly in that case, so for now,
// just mark that we don't backtrack into whatever is left (or rather
// we'll unwind it unconditionally without pausing to try other matches).
saved_state* pmp = m_backup_state;
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = m_backup_state;
--pmp;
}
(void) new (pmp)saved_state(16);
m_backup_state = pmp;
pstate = pstate->next.p;
// If we don't find a match we don't want to search further either:
restart = last;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool match)
{
while(pstate)
{
if(pstate->type == syntax_element_endmark)
{
if(static_cast<const re_brace*>(pstate)->index == index)
{
if(match)
return this->match_endmark();
pstate = pstate->next.p;
return true;
}
else
{
// Unenclosed closing ), occurs when (*ACCEPT) is inside some other
// parenthesis which may or may not have other side effects associated with it.
match_endmark();
if(!pstate)
{
unwind(true);
}
}
continue;
}
else if(pstate->type == syntax_element_match)
return true;
else if(pstate->type == syntax_element_startmark)
{
int index = static_cast<const re_brace*>(pstate)->index;
pstate = pstate->next.p;
skip_until_paren(index, false);
continue;
}
pstate = pstate->next.p;
}
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
{
#if 0
// Almost the same as match_match, but we need to close any half-open capturing groups:
for(unsigned i = 1; i < m_result.size(); ++i)
{
if((m_result[i].matched == false) && (m_result[i].first != last))
{
m_result.set_second(position, i);
}
}
if(!recursion_stack.empty())
{
// Skip forward to the end of this recursion:
while(pstate)
{
if(pstate->type == syntax_element_endmark)
if(static_cast<const re_brace*>(pstate)->index == recursion_stack.back().idx)
break;
pstate = pstate->next.p;
}
return true;
/*
int index = recursion_stack.back().idx;
pstate = recursion_stack.back().preturn_address;
*m_presult = recursion_stack.back().results;
push_recursion(index, recursion_stack.back().preturn_address, &recursion_stack.back().results);
recursion_stack.pop_back();
push_repeater_count(-(2 + index), &next_count);
return true;
*/
}
else
return match_match();
#endif
if(!recursion_stack.empty())
{
skip_until_paren(recursion_stack.back().idx);
}
else
{
skip_until_paren(INT_MAX);
}
return true;
}
/****************************************************************************
Unwind and associated proceedures follow, these perform what normal stack
@ -1034,6 +1145,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
&perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_commit,
};
m_recursive_result = have_match;
@ -1583,6 +1695,15 @@ void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop()
(void) new (pmp)saved_state(15);
m_backup_state = pmp;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
{
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
while(unwind(b)) {}
return false;
}
/*
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r)

View File

@ -60,7 +60,7 @@ public:
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[32] =
static matcher_proc_type const s_match_vtable[33] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -98,6 +98,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
&perl_matcher<BidiIterator, Allocator, traits>::match_accept,
&perl_matcher<BidiIterator, Allocator, traits>::match_commit,
};
if(state_count > max_state_count)

View File

@ -124,6 +124,7 @@ enum syntax_element_type
// Verbs:
syntax_element_fail = syntax_element_recurse + 1,
syntax_element_accept = syntax_element_fail + 1,
syntax_element_commit = syntax_element_accept + 1,
};
#ifdef BOOST_REGEX_DEBUG