From 83140ddbed489daa7e5c02a7d389c1f016ef6f4e Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Wed, 30 Sep 2015 18:47:59 +0100 Subject: [PATCH] Fix up recursive implementation of ACCEPT and COMMIT. --- include/boost/regex/v4/perl_matcher.hpp | 3 +- .../boost/regex/v4/perl_matcher_common.hpp | 13 ++++++ .../regex/v4/perl_matcher_non_recursive.hpp | 14 ------- .../boost/regex/v4/perl_matcher_recursive.hpp | 40 ++++++++++++++++++- test/regress/main.cpp | 2 - test/regress/test_perl_ex.cpp | 10 ++--- 6 files changed, 58 insertions(+), 24 deletions(-) diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 3de39be4..7ae73bc1 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -446,9 +446,7 @@ private: bool match_fail(); bool match_accept(); bool match_commit(); -#ifdef BOOST_REGEX_NON_RECURSIVE bool skip_until_paren(int index, bool match = true); -#endif // find procs stored in s_find_vtable: bool find_restart_any(); @@ -509,6 +507,7 @@ private: #ifdef BOOST_REGEX_RECURSIVE // Set to false by a (*COMMIT): bool m_can_backtrack; + bool m_have_accept; #endif #ifdef BOOST_REGEX_NON_RECURSIVE // diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 00bcc191..5e79f6bb 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -82,6 +82,7 @@ void perl_matcher::construct_init(const basic_r m_backup_state = 0; #elif defined(BOOST_REGEX_RECURSIVE) m_can_backtrack = true; + m_have_accept = false; #endif // find the value to use for matching word boundaries: m_word_mask = re.get_data().m_word_mask; @@ -805,6 +806,18 @@ bool perl_matcher::match_fail() return false; } +template +bool perl_matcher::match_accept() +{ + if(!recursion_stack.empty()) + { + return skip_until_paren(recursion_stack.back().idx); + } + else + { + return skip_until_paren(INT_MAX); + } +} template bool perl_matcher::find_restart_any() diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index b83f6aee..3c77e7e3 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -1070,20 +1070,6 @@ bool perl_matcher::skip_until_paren(int index, return true; } -template -bool perl_matcher::match_accept() -{ - if(!recursion_stack.empty()) - { - skip_until_paren(recursion_stack.back().idx); - } - else - { - skip_until_paren(INT_MAX); - } - return true; -} - /**************************************************************************** Unwind and associated proceedures follow, these perform what normal stack diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index af3eb2c8..e5c15f03 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -142,6 +142,8 @@ bool perl_matcher::match_startmark() r = false; else r = true; + if(r && m_have_accept) + r = skip_until_paren(INT_MAX); break; } case -3: @@ -183,6 +185,8 @@ bool perl_matcher::match_startmark() } } #endif + if(r && m_have_accept) + r = skip_until_paren(INT_MAX); break; } case -4: @@ -1003,11 +1007,45 @@ bool perl_matcher::match_commit() } template -bool perl_matcher::match_accept() +bool perl_matcher::skip_until_paren(int index, bool match) { + while(pstate) + { + if(pstate->type == syntax_element_endmark) + { + if(static_cast(pstate)->index == index) + { + if(match) + return this->match_endmark(); + pstate = pstate->next.p; + return true; + } + else + { + // Unenclosed closing ), occurs when (*ACCEPT) is inside some other + // parenthesis which may or may not have other side effects associated with it. + bool r = match_endmark(); + m_have_accept = true; + if(!pstate) + return r; + } + continue; + } + else if(pstate->type == syntax_element_match) + return true; + else if(pstate->type == syntax_element_startmark) + { + int idx = static_cast(pstate)->index; + pstate = pstate->next.p; + skip_until_paren(idx, false); + continue; + } + pstate = pstate->next.p; + } return true; } + } // namespace BOOST_REGEX_DETAIL_NS } // namespace boost #ifdef BOOST_MSVC diff --git a/test/regress/main.cpp b/test/regress/main.cpp index a469f0a2..d71b9658 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -49,7 +49,6 @@ int error_count = 0; void run_tests() { -#if 0 RUN_TESTS(basic_tests); RUN_TESTS(test_simple_repeats); RUN_TESTS(test_alt); @@ -83,7 +82,6 @@ void run_tests() RUN_TESTS(test_pocessive_repeats); RUN_TESTS(test_mark_resets); RUN_TESTS(test_recursion); -#endif RUN_TESTS(test_verbs); } diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 9c03e488..7764a510 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -939,13 +939,13 @@ void test_verbs() TEST_INVALID_REGEX("a+(*", perl); TEST_INVALID_REGEX("a+(*FX)", perl); TEST_REGEX_SEARCH("a+(*FAIL)b", perl, "aaaab", match_default, make_array(-2, -2)); - //TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "AB", match_default, make_array(0, 2, 0, 2, 1, 2, -1, -1, -2, -2)); - //TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "ACDE", match_default, make_array(0, 4, 0, 3, 1, 2, 3, 4, -2, -2)); + TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "AB", match_default, make_array(0, 2, 0, 2, 1, 2, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "ACDE", match_default, make_array(0, 4, 0, 3, 1, 2, 3, 4, -2, -2)); TEST_REGEX_SEARCH("^a+(*FAIL)", perl, "aaaaaa", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("a+b?c+(*FAIL)", perl, "aaabccc", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("a+b?(*COMMIT)c+(*FAIL)", perl, "aaabccc", match_default, make_array(-2, -2)); - /* + TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "AB", match_default, make_array(0, 2, 0, 2, 1, 2, -1, -1, -2, -2)); TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "ABX", match_default, make_array(0, 2, 0, 2, 1, 2, -1, -1, -2, -2)); TEST_REGEX_SEARCH("(A(A|B(*ACCEPT)|C)D)(E)", perl, "AADE", match_default, make_array(0, 4, 0, 3, 1, 2, 3, 4, -2, -2)); @@ -959,8 +959,8 @@ void test_verbs() TEST_REGEX_SEARCH("(?:(?1)|B)(A(*ACCEPT)XX|C)D", perl, "BAX", match_default, make_array(0, 2, 1, 2, -2, -2)); TEST_REGEX_SEARCH("(?:(?1)|B)(A(*ACCEPT)XX|C)D", perl, "ACX", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(?:(?1)|B)(A(*ACCEPT)XX|C)D", perl, "ABC", match_default, make_array(-2, -2)); - */ - //TEST_REGEX_SEARCH("^(?=a(*ACCEPT)b)", perl, "ac", match_default, make_array(0, 0, -2, -2)); + + TEST_REGEX_SEARCH("^(?=a(*ACCEPT)b)", perl, "ac", match_default, make_array(0, 0, -2, -2)); TEST_REGEX_SEARCH("A(*COMMIT)(B|D)", perl, "ACABX", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(*COMMIT)(A|P)(B|P)(C|P)", perl, "ABCDEFG", match_default, make_array(0, 3, 0, 1, 1, 2, 2, 3, -2, -2));