diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 574f075d..c7a86ac5 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -68,6 +68,8 @@ public: bool parse_inner_set(basic_char_set& char_set); bool parse_QE(); bool parse_perl_extension(); + bool parse_perl_verb(); + bool match_verb(const char*); bool add_emacs_code(bool negate); bool unwind_alts(std::ptrdiff_t last_paren_start); digraph get_next_set_literal(basic_char_set& char_set); @@ -421,6 +423,8 @@ bool basic_regex_parser::parse_open_paren() { if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) return parse_perl_extension(); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star) + return parse_perl_verb(); } // // update our mark count, and append the required state: @@ -2652,6 +2656,70 @@ option_group_jump: return true; } +template +bool basic_regex_parser::match_verb(const char* verb) +{ + while(*verb) + { + if(static_cast(*verb) != *m_position) + { + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++verb; + } + return true; +} + +template +bool basic_regex_parser::parse_perl_verb() +{ + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + switch(*m_position) + { + case 'F': + if(++m_position == m_end) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL")) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (* sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + ++m_position; + this->append_state(syntax_element_fail); + return true; + } + return false; + } + return false; +} + template bool basic_regex_parser::add_emacs_code(bool negate) { diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index d90418f5..b6588978 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -443,6 +443,7 @@ private: bool backtrack_till_match(std::size_t count); #endif bool match_recursion(); + bool match_fail(); // find procs stored in s_find_vtable: bool find_restart_any(); diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index 2f4109d6..ddb8fadd 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -793,6 +793,13 @@ bool perl_matcher::match_toggle_case() return true; } +template +bool perl_matcher::match_fail() +{ + // Just force a backtrack: + return false; +} + template bool perl_matcher::find_restart_any() diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index be3d53a9..1b3363f8 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -141,7 +141,7 @@ struct saved_recursion : public saved_state template bool perl_matcher::match_all_states() { - static matcher_proc_type const s_match_vtable[30] = + static matcher_proc_type const s_match_vtable[31] = { (&perl_matcher::match_startmark), &perl_matcher::match_endmark, @@ -177,6 +177,7 @@ bool perl_matcher::match_all_states() &perl_matcher::match_assert_backref, &perl_matcher::match_toggle_case, &perl_matcher::match_recursion, + &perl_matcher::match_fail, }; push_recursion_stopper(); diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index ea912402..f1d2e636 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -60,7 +60,7 @@ public: template bool perl_matcher::match_all_states() { - static matcher_proc_type const s_match_vtable[30] = + static matcher_proc_type const s_match_vtable[31] = { (&perl_matcher::match_startmark), &perl_matcher::match_endmark, @@ -96,6 +96,7 @@ bool perl_matcher::match_all_states() &perl_matcher::match_assert_backref, &perl_matcher::match_toggle_case, &perl_matcher::match_recursion, + &perl_matcher::match_fail, }; if(state_count > max_state_count) diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index 5e50f4a8..2caf2eaf 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -120,7 +120,9 @@ enum syntax_element_type syntax_element_assert_backref = syntax_element_backstep + 1, syntax_element_toggle_case = syntax_element_assert_backref + 1, // a recursive expression: - syntax_element_recurse = syntax_element_toggle_case + 1 + syntax_element_recurse = syntax_element_toggle_case + 1, + // Verbs: + syntax_element_fail = syntax_element_recurse + 1, }; #ifdef BOOST_REGEX_DEBUG diff --git a/test/regress/main.cpp b/test/regress/main.cpp index f9527e41..a97c48a0 100644 --- a/test/regress/main.cpp +++ b/test/regress/main.cpp @@ -82,6 +82,7 @@ void run_tests() RUN_TESTS(test_pocessive_repeats); RUN_TESTS(test_mark_resets); RUN_TESTS(test_recursion); + RUN_TESTS(test_verbs); } int cpp_main(int /*argc*/, char * /*argv*/[]) diff --git a/test/regress/test.hpp b/test/regress/test.hpp index e93ac5ec..d9224e8e 100644 --- a/test/regress/test.hpp +++ b/test/regress/test.hpp @@ -288,5 +288,6 @@ void test_unicode(); void test_pocessive_repeats(); void test_mark_resets(); void test_recursion(); +void test_verbs(); #endif diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 5ff48e1d..c90a1d1e 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -932,3 +932,11 @@ void test_recursion() TEST_REGEX_SEARCH("(?:(?a+)|(?b+)|c+)\\.\\k", perl, "cccc.cccc", match_default, make_array(-2, -2)); } +void test_verbs() +{ + using namespace boost::regex_constants; + + TEST_INVALID_REGEX("a+(*", perl); + TEST_INVALID_REGEX("a+(*FX)", perl); + TEST_REGEX_SEARCH("a+(*FAIL)b", perl, "aaaab", match_default, make_array(-2, -2)); +} \ No newline at end of file