From 9a36e035f2cdb034a88a0d9512e240293d29da08 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Thu, 1 Oct 2015 18:34:59 +0100 Subject: [PATCH] Add support for PRUNE and SKIP (no MARK's though). --- include/boost/regex/v4/basic_regex_parser.hpp | 52 ++++++++++++++++++- include/boost/regex/v4/perl_matcher.hpp | 2 + .../boost/regex/v4/perl_matcher_common.hpp | 3 ++ .../regex/v4/perl_matcher_non_recursive.hpp | 44 ++++++++++++++-- .../boost/regex/v4/perl_matcher_recursive.hpp | 11 +++- include/boost/regex/v4/states.hpp | 15 ++++++ test/regress/test_perl_ex.cpp | 25 ++++----- 7 files changed, 132 insertions(+), 20 deletions(-) diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 7a2e9745..43205c79 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -2760,7 +2760,57 @@ bool basic_regex_parser::parse_perl_verb() return false; } ++m_position; - this->append_state(syntax_element_commit); + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'P': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("RUNE")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune; + this->m_pdata->m_disable_match_any = true; + return true; + } + break; + case 'S': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(match_verb("KIP")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + static_cast(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip; this->m_pdata->m_disable_match_any = true; return true; } diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 7ae73bc1..09199d6f 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -554,6 +554,8 @@ private: bool m_recursive_result; // how many memory blocks have we used up?: unsigned used_block_count; + // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP: + bool m_unwound_lookahead; #endif // these operations aren't allowed, so are declared private, diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 5e79f6bb..f3949ccf 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -354,6 +354,9 @@ bool perl_matcher::match_prefix() #endif if(!m_has_found_match) position = restart; // reset search postion +#ifdef BOOST_REGEX_RECURSIVE + m_can_backtrack = true; // reset for further searches +#endif return m_has_found_match; } diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 3c77e7e3..5e90bb08 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -1014,6 +1014,27 @@ bool perl_matcher::match_commit() // however we might not unwind correctly in that case, so for now, // just mark that we don't backtrack into whatever is left (or rather // we'll unwind it unconditionally without pausing to try other matches). + + switch(static_cast(pstate)->action) + { + case commit_commit: + if(base != last) + { + restart = last; + --restart; + } + break; + case commit_skip: + if(position != base) + { + restart = position; + --restart; + } + break; + case commit_prune: + break; + } + saved_state* pmp = m_backup_state; --pmp; if(pmp < m_stack_base) @@ -1025,8 +1046,6 @@ bool perl_matcher::match_commit() (void) new (pmp)saved_state(16); m_backup_state = pmp; pstate = pstate->next.p; - // If we don't find a match we don't want to search further either: - restart = last; return true; } @@ -1102,6 +1121,7 @@ bool perl_matcher::unwind(bool have_match) }; m_recursive_result = have_match; + m_unwound_lookahead = false; unwind_proc_type unwinder; bool cont; // @@ -1166,6 +1186,7 @@ bool perl_matcher::unwind_assertion(bool r) m_recursive_result = pmp->positive ? r : !r; boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++); m_backup_state = pmp; + m_unwound_lookahead = true; return !result; // return false if the assertion was matched to stop search. } @@ -1653,7 +1674,24 @@ template bool perl_matcher::unwind_commit(bool b) { boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++); - while(unwind(b)) {} + while(unwind(b) && !m_unwound_lookahead){} + if(m_unwound_lookahead && pstate) + { + // + // If we stop because we just unwound an assertion, put the + // commit state back on the stack again: + // + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; + } return false; } diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index e5c15f03..da9fd61a 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -1001,7 +1001,16 @@ template bool perl_matcher::match_commit() { m_can_backtrack = false; - restart = last; + int action = static_cast(pstate)->action; + switch(action) + { + case commit_commit: + restart = last; + break; + case commit_skip: + restart = position; + break; + } pstate = pstate->next.p; return true; } diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index aba08180..f0b36179 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -260,6 +260,21 @@ struct re_recurse : public re_jump int state_id; // identifier of first nested repeat within the recursion. }; +/*** struct re_commit ************************************************* +Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens +if no match is found and we start searching forward. +**********************************************************************/ +enum commit_type +{ + commit_prune, + commit_skip, + commit_commit, +}; +struct re_commit : public re_syntax_base +{ + commit_type action; +}; + /*** enum re_jump_size_type ******************************************* Provides compiled size of re_jump structure (allowing for trailing alignment). We provide this so we know how manybytes to insert when constructing the machine diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 7764a510..9090931f 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -969,16 +969,19 @@ void test_verbs() TEST_REGEX_SEARCH("(\\w+)(?>b(*COMMIT))\\w{2}", perl, "abbb", match_default, make_array(0, 4, 0, 1, -2, -2)); TEST_REGEX_SEARCH("(\\w+)b(*COMMIT)\\w{2}", perl, "abbb", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a+b?(*PRUNE)c+(*FAIL)", perl, "aaabccc", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("a+b?(*SKIP)c+(*FAIL)", perl, "aaabcccaaabccc", match_default, make_array(-2, -2)); // - + TEST_REGEX_SEARCH("^(?=a(*SKIP)b|ac)", perl, "ac", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ab", match_default, make_array(0, 0, -2, -2)); + TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ac", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("AA+(*PRUNE)(B|Z)|AC", perl, "AAAC", match_default, make_array(2, 4, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|C", perl, "AAAC", match_default, make_array(3, 4, -1, -1, -2, -2)); + TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2)); + TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2)); #if 0 -/a+b?(*PRUNE)c+(*FAIL)/ - aaabccc - -/a+b?(*SKIP)c+(*FAIL)/ - aaabcccaaabccc - /^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/ aaaxxxxxx aaa++++++ @@ -1000,14 +1003,6 @@ void test_verbs() /a+b?(*THEN)c+(*FAIL)/ aaabccc -/^(?=a(*SKIP)b|ac)/ - ** Failers - ac - -/^(?=a(*PRUNE)b)/ - ab - ** Failers - ac ~~~~~