Add support for PRUNE and SKIP (no MARK's though).

This commit is contained in:
jzmaddock
2015-10-01 18:34:59 +01:00
parent 83140ddbed
commit 9a36e035f2
7 changed files with 132 additions and 20 deletions

View File

@ -2760,7 +2760,57 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
return false; return false;
} }
++m_position; ++m_position;
this->append_state(syntax_element_commit); static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
this->m_pdata->m_disable_match_any = true;
return true;
}
break;
case 'P':
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if(match_verb("RUNE"))
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++m_position;
static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
this->m_pdata->m_disable_match_any = true;
return true;
}
break;
case 'S':
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if(match_verb("KIP"))
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++m_position;
static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
this->m_pdata->m_disable_match_any = true; this->m_pdata->m_disable_match_any = true;
return true; return true;
} }

View File

@ -554,6 +554,8 @@ private:
bool m_recursive_result; bool m_recursive_result;
// how many memory blocks have we used up?: // how many memory blocks have we used up?:
unsigned used_block_count; unsigned used_block_count;
// We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP:
bool m_unwound_lookahead;
#endif #endif
// these operations aren't allowed, so are declared private, // these operations aren't allowed, so are declared private,

View File

@ -354,6 +354,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
#endif #endif
if(!m_has_found_match) if(!m_has_found_match)
position = restart; // reset search postion position = restart; // reset search postion
#ifdef BOOST_REGEX_RECURSIVE
m_can_backtrack = true; // reset for further searches
#endif
return m_has_found_match; return m_has_found_match;
} }

View File

@ -1014,6 +1014,27 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
// however we might not unwind correctly in that case, so for now, // however we might not unwind correctly in that case, so for now,
// just mark that we don't backtrack into whatever is left (or rather // just mark that we don't backtrack into whatever is left (or rather
// we'll unwind it unconditionally without pausing to try other matches). // we'll unwind it unconditionally without pausing to try other matches).
switch(static_cast<const re_commit*>(pstate)->action)
{
case commit_commit:
if(base != last)
{
restart = last;
--restart;
}
break;
case commit_skip:
if(position != base)
{
restart = position;
--restart;
}
break;
case commit_prune:
break;
}
saved_state* pmp = m_backup_state; saved_state* pmp = m_backup_state;
--pmp; --pmp;
if(pmp < m_stack_base) if(pmp < m_stack_base)
@ -1025,8 +1046,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
(void) new (pmp)saved_state(16); (void) new (pmp)saved_state(16);
m_backup_state = pmp; m_backup_state = pmp;
pstate = pstate->next.p; pstate = pstate->next.p;
// If we don't find a match we don't want to search further either:
restart = last;
return true; return true;
} }
@ -1102,6 +1121,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
}; };
m_recursive_result = have_match; m_recursive_result = have_match;
m_unwound_lookahead = false;
unwind_proc_type unwinder; unwind_proc_type unwinder;
bool cont; bool cont;
// //
@ -1166,6 +1186,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_assertion(bool r)
m_recursive_result = pmp->positive ? r : !r; m_recursive_result = pmp->positive ? r : !r;
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
m_backup_state = pmp; m_backup_state = pmp;
m_unwound_lookahead = true;
return !result; // return false if the assertion was matched to stop search. return !result; // return false if the assertion was matched to stop search.
} }
@ -1653,7 +1674,24 @@ template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b) bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
{ {
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
while(unwind(b)) {} while(unwind(b) && !m_unwound_lookahead){}
if(m_unwound_lookahead && pstate)
{
//
// If we stop because we just unwound an assertion, put the
// commit state back on the stack again:
//
saved_state* pmp = m_backup_state;
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = m_backup_state;
--pmp;
}
(void) new (pmp)saved_state(16);
m_backup_state = pmp;
}
return false; return false;
} }

View File

@ -1001,7 +1001,16 @@ template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_commit() bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
{ {
m_can_backtrack = false; m_can_backtrack = false;
int action = static_cast<const re_commit*>(pstate)->action;
switch(action)
{
case commit_commit:
restart = last; restart = last;
break;
case commit_skip:
restart = position;
break;
}
pstate = pstate->next.p; pstate = pstate->next.p;
return true; return true;
} }

View File

@ -260,6 +260,21 @@ struct re_recurse : public re_jump
int state_id; // identifier of first nested repeat within the recursion. int state_id; // identifier of first nested repeat within the recursion.
}; };
/*** struct re_commit *************************************************
Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens
if no match is found and we start searching forward.
**********************************************************************/
enum commit_type
{
commit_prune,
commit_skip,
commit_commit,
};
struct re_commit : public re_syntax_base
{
commit_type action;
};
/*** enum re_jump_size_type ******************************************* /*** enum re_jump_size_type *******************************************
Provides compiled size of re_jump structure (allowing for trailing alignment). Provides compiled size of re_jump structure (allowing for trailing alignment).
We provide this so we know how manybytes to insert when constructing the machine We provide this so we know how manybytes to insert when constructing the machine

View File

@ -969,16 +969,19 @@ void test_verbs()
TEST_REGEX_SEARCH("(\\w+)(?>b(*COMMIT))\\w{2}", perl, "abbb", match_default, make_array(0, 4, 0, 1, -2, -2)); TEST_REGEX_SEARCH("(\\w+)(?>b(*COMMIT))\\w{2}", perl, "abbb", match_default, make_array(0, 4, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(\\w+)b(*COMMIT)\\w{2}", perl, "abbb", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(\\w+)b(*COMMIT)\\w{2}", perl, "abbb", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a+b?(*PRUNE)c+(*FAIL)", perl, "aaabccc", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("a+b?(*SKIP)c+(*FAIL)", perl, "aaabcccaaabccc", match_default, make_array(-2, -2));
// //
TEST_REGEX_SEARCH("^(?=a(*SKIP)b|ac)", perl, "ac", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ab", match_default, make_array(0, 0, -2, -2));
TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ac", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("AA+(*PRUNE)(B|Z)|AC", perl, "AAAC", match_default, make_array(2, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|C", perl, "AAAC", match_default, make_array(3, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2));
#if 0 #if 0
/a+b?(*PRUNE)c+(*FAIL)/
aaabccc
/a+b?(*SKIP)c+(*FAIL)/
aaabcccaaabccc
/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/ /^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
aaaxxxxxx aaaxxxxxx
aaa++++++ aaa++++++
@ -1000,14 +1003,6 @@ void test_verbs()
/a+b?(*THEN)c+(*FAIL)/ /a+b?(*THEN)c+(*FAIL)/
aaabccc aaabccc
/^(?=a(*SKIP)b|ac)/
** Failers
ac
/^(?=a(*PRUNE)b)/
ab
** Failers
ac
~~~~~ ~~~~~