Get THEN working for non-recursive implementation.

Fix various bugs in the other verbs exposed by new tests.
This commit is contained in:
jzmaddock
2015-10-04 19:28:25 +01:00
parent 9a36e035f2
commit 7a4e883675
5 changed files with 105 additions and 49 deletions

View File

@ -2815,6 +2815,31 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
return true; return true;
} }
break; break;
case 'T':
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if(match_verb("HEN"))
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++m_position;
this->append_state(syntax_element_then);
this->m_pdata->m_disable_match_any = true;
return true;
}
break;
} }
return false; return false;
} }

View File

@ -446,6 +446,7 @@ private:
bool match_fail(); bool match_fail();
bool match_accept(); bool match_accept();
bool match_commit(); bool match_commit();
bool match_then();
bool skip_until_paren(int index, bool match = true); bool skip_until_paren(int index, bool match = true);
// find procs stored in s_find_vtable: // find procs stored in s_find_vtable:
@ -534,6 +535,7 @@ private:
bool unwind_recursion(bool); bool unwind_recursion(bool);
bool unwind_recursion_pop(bool); bool unwind_recursion_pop(bool);
bool unwind_commit(bool); bool unwind_commit(bool);
bool unwind_then(bool);
void destroy_single_repeat(); void destroy_single_repeat();
void push_matched_paren(int index, const sub_match<BidiIterator>& sub); void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
void push_recursion_stopper(); void push_recursion_stopper();
@ -549,13 +551,17 @@ private:
saved_state* m_stack_base; saved_state* m_stack_base;
// pointer to current stack position: // pointer to current stack position:
saved_state* m_backup_state; saved_state* m_backup_state;
// how many memory blocks have we used up?:
unsigned used_block_count;
// determines what value to return when unwinding from recursion, // determines what value to return when unwinding from recursion,
// allows for mixed recursive/non-recursive algorithm: // allows for mixed recursive/non-recursive algorithm:
bool m_recursive_result; bool m_recursive_result;
// how many memory blocks have we used up?:
unsigned used_block_count;
// We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP: // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP:
bool m_unwound_lookahead; bool m_unwound_lookahead;
// We have unwound to an alternative, used by THEN:
bool m_unwound_alt;
// We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding:
//bool m_unwind_commit;
#endif #endif
// these operations aren't allowed, so are declared private, // these operations aren't allowed, so are declared private,

View File

@ -141,7 +141,7 @@ struct saved_recursion : public saved_state
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{ {
static matcher_proc_type const s_match_vtable[33] = static matcher_proc_type const s_match_vtable[34] =
{ {
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark, &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -180,6 +180,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_fail, &perl_matcher<BidiIterator, Allocator, traits>::match_fail,
&perl_matcher<BidiIterator, Allocator, traits>::match_accept, &perl_matcher<BidiIterator, Allocator, traits>::match_accept,
&perl_matcher<BidiIterator, Allocator, traits>::match_commit, &perl_matcher<BidiIterator, Allocator, traits>::match_commit,
&perl_matcher<BidiIterator, Allocator, traits>::match_then,
}; };
push_recursion_stopper(); push_recursion_stopper();
@ -373,6 +374,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p; const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p; pstate = pstate->next.p->next.p;
bool r = match_all_states(); bool r = match_all_states();
if(!r && !m_independent)
{
// Must be unwinding from a COMMIT/SKIP/PRUNE and the independent
// sub failed, need to unwind everything else:
while(unwind(false));
return false;
}
pstate = next_pstate; pstate = next_pstate;
m_independent = old_independent; m_independent = old_independent;
#ifdef BOOST_REGEX_MATCH_EXTRA #ifdef BOOST_REGEX_MATCH_EXTRA
@ -1018,16 +1026,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
switch(static_cast<const re_commit*>(pstate)->action) switch(static_cast<const re_commit*>(pstate)->action)
{ {
case commit_commit: case commit_commit:
if(base != last)
{
restart = last; restart = last;
--restart;
}
break; break;
case commit_skip: case commit_skip:
if(position != base) if(base != position)
{ {
restart = position; restart = position;
// Have to decrement restart since it will get incremented again later:
--restart; --restart;
} }
break; break;
@ -1049,6 +1054,24 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
return true; return true;
} }
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_then()
{
// Just leave a mark that we need to skip to next alternative:
saved_state* pmp = m_backup_state;
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = m_backup_state;
--pmp;
}
(void) new (pmp)saved_state(17);
m_backup_state = pmp;
pstate = pstate->next.p;
return true;
}
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool match) bool perl_matcher<BidiIterator, Allocator, traits>::skip_until_paren(int index, bool match)
{ {
@ -1099,7 +1122,7 @@ unwinding does in the recursive implementation.
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match) bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
{ {
static unwind_proc_type const s_unwind_table[18] = static unwind_proc_type const s_unwind_table[19] =
{ {
&perl_matcher<BidiIterator, Allocator, traits>::unwind_end, &perl_matcher<BidiIterator, Allocator, traits>::unwind_end,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_paren, &perl_matcher<BidiIterator, Allocator, traits>::unwind_paren,
@ -1118,10 +1141,12 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion, &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop, &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_commit, &perl_matcher<BidiIterator, Allocator, traits>::unwind_commit,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_then,
}; };
m_recursive_result = have_match; m_recursive_result = have_match;
m_unwound_lookahead = false; m_unwound_lookahead = false;
m_unwound_alt = false;
unwind_proc_type unwinder; unwind_proc_type unwinder;
bool cont; bool cont;
// //
@ -1201,6 +1226,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_alt(bool r)
} }
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
m_backup_state = pmp; m_backup_state = pmp;
m_unwound_alt = !r;
return r; return r;
} }
@ -1692,6 +1718,22 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
(void) new (pmp)saved_state(16); (void) new (pmp)saved_state(16);
m_backup_state = pmp; m_backup_state = pmp;
} }
// This prevents us from stopping when we exit from an independent sub-expression:
m_independent = false;
return false;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_then(bool b)
{
// Unwind everything till we hit an alternative:
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
bool result = false;
while((result = unwind(b)) && !m_unwound_alt){}
// We're now pointing at the next alternative, need one more backtrack
// since *all* the other alternatives must fail once we've reached a THEN clause:
if(result && m_unwound_alt)
unwind(b);
return false; return false;
} }

View File

@ -125,6 +125,7 @@ enum syntax_element_type
syntax_element_fail = syntax_element_recurse + 1, syntax_element_fail = syntax_element_recurse + 1,
syntax_element_accept = syntax_element_fail + 1, syntax_element_accept = syntax_element_fail + 1,
syntax_element_commit = syntax_element_accept + 1, syntax_element_commit = syntax_element_accept + 1,
syntax_element_then = syntax_element_commit + 1,
}; };
#ifdef BOOST_REGEX_DEBUG #ifdef BOOST_REGEX_DEBUG

View File

@ -981,30 +981,29 @@ void test_verbs()
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2)); TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaaxxxxxx", match_default, make_array(0, 9, -2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaa++++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbbxxxxx", match_default, make_array(0, 8, -2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbb+++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "cccxxxx", match_default, make_array(0, 7, -2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "ccc++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "dddddddd", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaaxxxxxx", match_default, make_array(0, 9, 0, 9, -2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaa++++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbbxxxxx", match_default, make_array(0, 8, 0, 8, -2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbb+++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "cccxxxx", match_default, make_array(0, 7, 0, 7, -2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "ccc++++", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "dddddddd", match_default, make_array(0, 3, 0, 3, -2, -2));
TEST_REGEX_SEARCH("(?:a+(*THEN)\\w{6}|x\\w{3})", perl, "aaaxxxxx", match_default, make_array(3, 7, -2, -2));
TEST_REGEX_SEARCH("(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?", perl, "yes", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?>(*COMMIT)(yes|no)(*THEN)(*F))?", perl, "yes", match_default, make_array(-2, -2));
#if 0 #if 0
/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
aaaxxxxxx
aaa++++++
bbbxxxxx
bbb+++++
cccxxxx
ccc++++
dddddddd
/^(aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
aaaxxxxxx
aaa++++++
bbbxxxxx
bbb+++++
cccxxxx
ccc++++
dddddddd
/a+b?(*THEN)c+(*FAIL)/
aaabccc
~~~~~
# Check the use of names for failure # Check the use of names for failure
@ -1056,23 +1055,6 @@ void test_verbs()
/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/x,mark /A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/x,mark
AAAC AAAC
# COMMIT should override THEN.
/(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/
yes
/(?>(*COMMIT)(yes|no)(*THEN)(*F))?/
yes
/b?(*SKIP)c/
bc
abc
/(*SKIP)bc/
a
/(*SKIP)b/
a
#endif #endif