mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 22:02:08 +02:00
Add COMMIT support plus lots of tests.
This commit is contained in:
@ -164,9 +164,9 @@ struct regex_data : public named_subexpressions
|
||||
|
||||
regex_data(const ::boost::shared_ptr<
|
||||
::boost::regex_traits_wrapper<traits> >& t)
|
||||
: m_ptraits(t), m_expression(0), m_expression_len(0) {}
|
||||
: m_ptraits(t), m_expression(0), m_expression_len(0), m_disable_match_any(false) {}
|
||||
regex_data()
|
||||
: m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0) {}
|
||||
: m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0), m_disable_match_any(false) {}
|
||||
|
||||
::boost::shared_ptr<
|
||||
::boost::regex_traits_wrapper<traits>
|
||||
@ -186,6 +186,7 @@ struct regex_data : public named_subexpressions
|
||||
std::pair<
|
||||
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
|
||||
bool m_has_recursions; // whether we have recursive expressions;
|
||||
bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour.
|
||||
};
|
||||
//
|
||||
// class basic_regex_implementation
|
||||
|
@ -1149,6 +1149,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
set_all_masks(l_map, mask);
|
||||
return;
|
||||
}
|
||||
case syntax_element_accept:
|
||||
case syntax_element_match:
|
||||
{
|
||||
// must be null, any character can match:
|
||||
@ -1335,6 +1336,11 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
state = state->next.p;
|
||||
break;
|
||||
|
||||
case syntax_element_commit:
|
||||
set_all_masks(l_map, mask);
|
||||
// Continue scanning so we can figure out whether we can be null:
|
||||
state = state->next.p;
|
||||
break;
|
||||
case syntax_element_startmark:
|
||||
// need to handle independent subs as a special case:
|
||||
if(static_cast<re_brace*>(state)->index == -3)
|
||||
|
@ -2740,6 +2740,31 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 'C':
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
// Rewind to start of (* sequence:
|
||||
--m_position;
|
||||
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(match_verb("OMMIT"))
|
||||
{
|
||||
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
{
|
||||
// Rewind to start of (* sequence:
|
||||
--m_position;
|
||||
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
++m_position;
|
||||
this->append_state(syntax_element_commit);
|
||||
this->m_pdata->m_disable_match_any = true;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -445,6 +445,8 @@ private:
|
||||
bool match_recursion();
|
||||
bool match_fail();
|
||||
bool match_accept();
|
||||
bool match_commit();
|
||||
bool skip_until_paren(int index, bool match = true);
|
||||
|
||||
// find procs stored in s_find_vtable:
|
||||
bool find_restart_any();
|
||||
@ -527,6 +529,7 @@ private:
|
||||
bool unwind_non_greedy_repeat(bool);
|
||||
bool unwind_recursion(bool);
|
||||
bool unwind_recursion_pop(bool);
|
||||
bool unwind_commit(bool);
|
||||
void destroy_single_repeat();
|
||||
void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
|
||||
void push_recursion_stopper();
|
||||
|
@ -85,6 +85,9 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_r
|
||||
m_word_mask = re.get_data().m_word_mask;
|
||||
// find bitmask to use for matching '.':
|
||||
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
|
||||
// Disable match_any if requested in the state machine:
|
||||
if(e.get_data().m_disable_match_any)
|
||||
m_match_flags &= ~regex_constants::match_any;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
@ -800,21 +803,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
|
||||
{
|
||||
// Almost the same as match_match, but we need to close any half-open capturing groups:
|
||||
for(unsigned i = 1; i < m_result.size(); ++i)
|
||||
{
|
||||
if((m_result[i].matched == false) && (m_result[i].first != last))
|
||||
{
|
||||
m_result.set_second(position, i);
|
||||
}
|
||||
}
|
||||
return match_match();
|
||||
}
|
||||
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
|
||||
{
|
||||
|
@ -141,7 +141,7 @@ struct saved_recursion : public saved_state
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[32] =
|
||||
static matcher_proc_type const s_match_vtable[33] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -179,6 +179,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_accept,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_commit,
|
||||
};
|
||||
|
||||
push_recursion_stopper();
|
||||
@ -1006,6 +1007,116 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
|
||||
{
|
||||
// Ideally we would just junk all the states that are on the stack,
|
||||
// however we might not unwind correctly in that case, so for now,
|
||||
// just mark that we don't backtrack into whatever is left (or rather
|
||||
// we'll unwind it unconditionally without pausing to try other matches).
|
||||
saved_state* pmp = m_backup_state;
|
||||
--pmp;
|
||||
if(pmp < m_stack_base)
|
||||
{
|
||||
extend_stack();
|
||||
pmp = m_backup_state;
|
||||
--pmp;
|
||||
}
|
||||
(void) new (pmp)saved_state(16);
|
||||
m_backup_state = pmp;
|
||||
pstate = pstate->next.p;
|
||||
// If we don't find a match we don't want to search further either:
|
||||
restart = last;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool match)
|
||||
{
|
||||
while(pstate)
|
||||
{
|
||||
if(pstate->type == syntax_element_endmark)
|
||||
{
|
||||
if(static_cast<const re_brace*>(pstate)->index == index)
|
||||
{
|
||||
if(match)
|
||||
return this->match_endmark();
|
||||
pstate = pstate->next.p;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unenclosed closing ), occurs when (*ACCEPT) is inside some other
|
||||
// parenthesis which may or may not have other side effects associated with it.
|
||||
match_endmark();
|
||||
if(!pstate)
|
||||
{
|
||||
unwind(true);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if(pstate->type == syntax_element_match)
|
||||
return true;
|
||||
else if(pstate->type == syntax_element_startmark)
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
pstate = pstate->next.p;
|
||||
skip_until_paren(index, false);
|
||||
continue;
|
||||
}
|
||||
pstate = pstate->next.p;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
|
||||
{
|
||||
#if 0
|
||||
// Almost the same as match_match, but we need to close any half-open capturing groups:
|
||||
for(unsigned i = 1; i < m_result.size(); ++i)
|
||||
{
|
||||
if((m_result[i].matched == false) && (m_result[i].first != last))
|
||||
{
|
||||
m_result.set_second(position, i);
|
||||
}
|
||||
}
|
||||
if(!recursion_stack.empty())
|
||||
{
|
||||
// Skip forward to the end of this recursion:
|
||||
while(pstate)
|
||||
{
|
||||
if(pstate->type == syntax_element_endmark)
|
||||
if(static_cast<const re_brace*>(pstate)->index == recursion_stack.back().idx)
|
||||
break;
|
||||
pstate = pstate->next.p;
|
||||
}
|
||||
return true;
|
||||
/*
|
||||
int index = recursion_stack.back().idx;
|
||||
pstate = recursion_stack.back().preturn_address;
|
||||
*m_presult = recursion_stack.back().results;
|
||||
push_recursion(index, recursion_stack.back().preturn_address, &recursion_stack.back().results);
|
||||
recursion_stack.pop_back();
|
||||
push_repeater_count(-(2 + index), &next_count);
|
||||
return true;
|
||||
*/
|
||||
}
|
||||
else
|
||||
return match_match();
|
||||
#endif
|
||||
if(!recursion_stack.empty())
|
||||
{
|
||||
skip_until_paren(recursion_stack.back().idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
skip_until_paren(INT_MAX);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
||||
Unwind and associated proceedures follow, these perform what normal stack
|
||||
@ -1034,6 +1145,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_commit,
|
||||
};
|
||||
|
||||
m_recursive_result = have_match;
|
||||
@ -1583,6 +1695,15 @@ void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop()
|
||||
(void) new (pmp)saved_state(15);
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
|
||||
{
|
||||
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
|
||||
while(unwind(b)) {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r)
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[32] =
|
||||
static matcher_proc_type const s_match_vtable[33] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -98,6 +98,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_accept,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_commit,
|
||||
};
|
||||
|
||||
if(state_count > max_state_count)
|
||||
|
@ -124,6 +124,7 @@ enum syntax_element_type
|
||||
// Verbs:
|
||||
syntax_element_fail = syntax_element_recurse + 1,
|
||||
syntax_element_accept = syntax_element_fail + 1,
|
||||
syntax_element_commit = syntax_element_accept + 1,
|
||||
};
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
|
Reference in New Issue
Block a user