From 0e1e9804dab4a247a933bb1ebd393e7d5a596f54 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Tue, 5 Jan 2010 18:04:08 +0000 Subject: [PATCH] Fix bug that effects recursive expressions combined with repeats. [SVN r58722] --- .../boost/regex/v4/basic_regex_creator.hpp | 35 +++++++++++++++++++ include/boost/regex/v4/basic_regex_parser.hpp | 4 ++- include/boost/regex/v4/perl_matcher.hpp | 11 ++++-- .../regex/v4/perl_matcher_non_recursive.hpp | 5 +++ include/boost/regex/v4/states.hpp | 8 +++++ test/regress/test_perl_ex.cpp | 4 +++ 6 files changed, 63 insertions(+), 4 deletions(-) diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 0782e456..ee207d08 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -811,8 +811,43 @@ void basic_regex_creator::fixup_recursions(re_syntax_base* state) { if((p->type == syntax_element_startmark) && (static_cast(p)->index == id)) { + // + // We've found the target of the recursion, set the jump target: + // static_cast(state)->alt.p = p; ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast(p)->index == id) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast(state)->state_id = next_rep_id - 1; + } + break; } p = p->next.p; diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index c85ad9a5..3ea4d645 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1939,7 +1939,9 @@ bool basic_regex_parser::parse_perl_extension() } insert_recursion: pb->index = markid = 0; - static_cast(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v; + re_recurse* pr = static_cast(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = v; + pr->state_id = 0; static_cast( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = this->flags() & regbase::icase; diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 5bac21c0..52e0bcec 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -277,10 +277,15 @@ public: else { repeater_count* p = next; - while(p->state_id != state_id) + while(p && (p->state_id != state_id)) p = p->next; - count = p->count; - start_pos = p->start_pos; + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; } } ~repeater_count() diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 343afc02..0fcd4548 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -904,10 +904,15 @@ bool perl_matcher::match_recursion() } recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; recursion_stack[recursion_stack_position].results = *m_presult; + if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(static_cast(pstate)->state_id, &next_count); + } pstate = static_cast(pstate)->alt.p; recursion_stack[recursion_stack_position].id = static_cast(pstate)->index; ++recursion_stack_position; //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id); + return true; } diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index efdebbe5..38690b22 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -248,6 +248,14 @@ struct re_repeat : public re_alt bool greedy; // True if this is a greedy repeat }; +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + /*** enum re_jump_size_type ******************************************* Provides compiled size of re_jump structure (allowing for trailing alignment). We provide this so we know how manybytes to insert when constructing the machine diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 3c58fa42..d58da2a7 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -892,5 +892,9 @@ void test_recursion() TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.0.0.0", match_default, make_array(0, 8, 6, 8, -1, -1, -2, -2)); TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.6", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "455.3.4.5", match_default, make_array(-2, -2)); + + // Bugs: + TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2)); + TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2)); }