diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 43205c79..4a80ab9c 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -2815,6 +2815,31 @@ bool basic_regex_parser::parse_perl_verb() return true; } break; + case 'T': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("HEN")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_then); + this->m_pdata->m_disable_match_any = true; + return true; + } + break; } return false; } diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 09199d6f..768fd31d 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -446,6 +446,7 @@ private: bool match_fail(); bool match_accept(); bool match_commit(); + bool match_then(); bool skip_until_paren(int index, bool match = true); // find procs stored in s_find_vtable: @@ -534,6 +535,7 @@ private: bool unwind_recursion(bool); bool unwind_recursion_pop(bool); bool unwind_commit(bool); + bool unwind_then(bool); void destroy_single_repeat(); void push_matched_paren(int index, const sub_match& sub); void push_recursion_stopper(); @@ -549,13 +551,17 @@ private: saved_state* m_stack_base; // pointer to current stack position: saved_state* m_backup_state; + // how many memory blocks have we used up?: + unsigned used_block_count; // determines what value to return when unwinding from recursion, // allows for mixed recursive/non-recursive algorithm: bool m_recursive_result; - // how many memory blocks have we used up?: - unsigned used_block_count; // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP: bool m_unwound_lookahead; + // We have unwound to an alternative, used by THEN: + bool m_unwound_alt; + // We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding: + //bool m_unwind_commit; #endif // these operations aren't allowed, so are declared private, diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 5e90bb08..112dfa1c 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -141,7 +141,7 @@ struct saved_recursion : public saved_state template bool perl_matcher::match_all_states() { - static matcher_proc_type const s_match_vtable[33] = + static matcher_proc_type const s_match_vtable[34] = { (&perl_matcher::match_startmark), &perl_matcher::match_endmark, @@ -180,6 +180,7 @@ bool perl_matcher::match_all_states() &perl_matcher::match_fail, &perl_matcher::match_accept, &perl_matcher::match_commit, + &perl_matcher::match_then, }; push_recursion_stopper(); @@ -373,6 +374,13 @@ bool perl_matcher::match_startmark() const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; pstate = pstate->next.p->next.p; bool r = match_all_states(); + if(!r && !m_independent) + { + // Must be unwinding from a COMMIT/SKIP/PRUNE and the independent + // sub failed, need to unwind everything else: + while(unwind(false)); + return false; + } pstate = next_pstate; m_independent = old_independent; #ifdef BOOST_REGEX_MATCH_EXTRA @@ -1018,16 +1026,13 @@ bool perl_matcher::match_commit() switch(static_cast(pstate)->action) { case commit_commit: - if(base != last) - { - restart = last; - --restart; - } + restart = last; break; case commit_skip: - if(position != base) + if(base != position) { restart = position; + // Have to decrement restart since it will get incremented again later: --restart; } break; @@ -1049,6 +1054,24 @@ bool perl_matcher::match_commit() return true; } +template +bool perl_matcher::match_then() +{ + // Just leave a mark that we need to skip to next alternative: + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(17); + m_backup_state = pmp; + pstate = pstate->next.p; + return true; +} + template bool perl_matcher::skip_until_paren(int index, bool match) { @@ -1099,7 +1122,7 @@ unwinding does in the recursive implementation. template bool perl_matcher::unwind(bool have_match) { - static unwind_proc_type const s_unwind_table[18] = + static unwind_proc_type const s_unwind_table[19] = { &perl_matcher::unwind_end, &perl_matcher::unwind_paren, @@ -1118,10 +1141,12 @@ bool perl_matcher::unwind(bool have_match) &perl_matcher::unwind_recursion, &perl_matcher::unwind_recursion_pop, &perl_matcher::unwind_commit, + &perl_matcher::unwind_then, }; m_recursive_result = have_match; m_unwound_lookahead = false; + m_unwound_alt = false; unwind_proc_type unwinder; bool cont; // @@ -1201,6 +1226,7 @@ bool perl_matcher::unwind_alt(bool r) } boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); m_backup_state = pmp; + m_unwound_alt = !r; return r; } @@ -1692,6 +1718,22 @@ bool perl_matcher::unwind_commit(bool b) (void) new (pmp)saved_state(16); m_backup_state = pmp; } + // This prevents us from stopping when we exit from an independent sub-expression: + m_independent = false; + return false; +} + +template +bool perl_matcher::unwind_then(bool b) +{ + // Unwind everything till we hit an alternative: + boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); + bool result = false; + while((result = unwind(b)) && !m_unwound_alt){} + // We're now pointing at the next alternative, need one more backtrack + // since *all* the other alternatives must fail once we've reached a THEN clause: + if(result && m_unwound_alt) + unwind(b); return false; } diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index f0b36179..a621420f 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -125,6 +125,7 @@ enum syntax_element_type syntax_element_fail = syntax_element_recurse + 1, syntax_element_accept = syntax_element_fail + 1, syntax_element_commit = syntax_element_accept + 1, + syntax_element_then = syntax_element_commit + 1, }; #ifdef BOOST_REGEX_DEBUG diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 9090931f..a5132d09 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -981,30 +981,29 @@ void test_verbs() TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaaxxxxxx", match_default, make_array(0, 9, -2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaa++++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbbxxxxx", match_default, make_array(0, 8, -2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbb+++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "cccxxxx", match_default, make_array(0, 7, -2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "ccc++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?:aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "dddddddd", match_default, make_array(0, 3, -2, -2)); + + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaaxxxxxx", match_default, make_array(0, 9, 0, 9, -2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "aaa++++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbbxxxxx", match_default, make_array(0, 8, 0, 8, -2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "bbb+++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "cccxxxx", match_default, make_array(0, 7, 0, 7, -2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "ccc++++", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(aaa(*THEN)\\w{6}|bbb(*THEN)\\w{5}|ccc(*THEN)\\w{4}|\\w{3})", perl, "dddddddd", match_default, make_array(0, 3, 0, 3, -2, -2)); + + TEST_REGEX_SEARCH("(?:a+(*THEN)\\w{6}|x\\w{3})", perl, "aaaxxxxx", match_default, make_array(3, 7, -2, -2)); + TEST_REGEX_SEARCH("(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?", perl, "yes", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("(?>(*COMMIT)(yes|no)(*THEN)(*F))?", perl, "yes", match_default, make_array(-2, -2)); + + #if 0 -/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/ - aaaxxxxxx - aaa++++++ - bbbxxxxx - bbb+++++ - cccxxxx - ccc++++ - dddddddd -/^(aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/ - aaaxxxxxx - aaa++++++ - bbbxxxxx - bbb+++++ - cccxxxx - ccc++++ - dddddddd - -/a+b?(*THEN)c+(*FAIL)/ - aaabccc - - -~~~~~ # Check the use of names for failure @@ -1056,23 +1055,6 @@ void test_verbs() /A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/x,mark AAAC -# COMMIT should override THEN. - -/(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ - yes - -/(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ - yes - -/b?(*SKIP)c/ - bc - abc - -/(*SKIP)bc/ - a - -/(*SKIP)b/ - a #endif