From fc6408426d15f62c6857b932af1f479e90ca5a9b Mon Sep 17 00:00:00 2001 From: John Maddock Date: Mon, 11 Jan 2010 09:55:40 +0000 Subject: [PATCH] Merge changes in Trunk: Simplify regex_timer to eliminate regression failure. Rename extern "C" function to have a "boost_" prefix. Fix recursive-expression related bug, and update tests to match. [SVN r58885] --- example/timer/regex_timer.cpp | 20 +++++------ include/boost/regex/pending/static_mutex.hpp | 2 +- .../boost/regex/v4/basic_regex_creator.hpp | 35 +++++++++++++++++++ include/boost/regex/v4/basic_regex_parser.hpp | 4 ++- include/boost/regex/v4/perl_matcher.hpp | 11 ++++-- .../regex/v4/perl_matcher_non_recursive.hpp | 5 +++ include/boost/regex/v4/states.hpp | 8 +++++ src/static_mutex.cpp | 4 +-- test/regress/test_perl_ex.cpp | 4 +++ 9 files changed, 74 insertions(+), 19 deletions(-) diff --git a/example/timer/regex_timer.cpp b/example/timer/regex_timer.cpp index ad94e7ff..630eddd2 100644 --- a/example/timer/regex_timer.cpp +++ b/example/timer/regex_timer.cpp @@ -103,6 +103,7 @@ istream& getline(istream& is, std::string& s) char c = static_cast(is.get()); while(c != '\n') { + BOOST_ASSERT(is.good()); s.append(1, c); c = static_cast(is.get()); } @@ -123,16 +124,17 @@ istream& getline(istream& is, std::string& s) int main(int argc, char**argv) { ifstream ifs; - streambuf* pbuf = 0; + std::istream* p_in = &std::cin; if(argc == 2) { ifs.open(argv[1]); - if(ifs.bad()) + ifs.peek(); + if(!ifs.good()) { cout << "Bad filename: " << argv[1] << endl; return -1; } - pbuf = cin.rdbuf(ifs.rdbuf()); + p_in = &ifs; } boost::regex ex; @@ -152,12 +154,12 @@ int main(int argc, char**argv) double tim; int result = 0; unsigned iters = 100; - double wait_time = (std::min)(t.elapsed_min() * 1000, 1.0); + double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5); while(true) { cout << "Enter expression (or \"quit\" to exit): "; - boost::getline(cin, s1); + boost::getline(*p_in, s1); if(argc == 2) cout << endl << s1 << endl; if(s1 == "quit") @@ -191,7 +193,7 @@ int main(int argc, char**argv) while(true) { cout << "Enter string to search (or \"quit\" to exit): "; - boost::getline(cin, s2); + boost::getline(*p_in, s2); if(argc == 2) cout << endl << s2 << endl; if(s2 == "quit") @@ -365,12 +367,6 @@ int main(int argc, char**argv) regfreeA(&r); } - if(pbuf) - { - cin.rdbuf(pbuf); - ifs.close(); - } - return 0; } diff --git a/include/boost/regex/pending/static_mutex.hpp b/include/boost/regex/pending/static_mutex.hpp index 218169c3..334ef27d 100644 --- a/include/boost/regex/pending/static_mutex.hpp +++ b/include/boost/regex/pending/static_mutex.hpp @@ -140,7 +140,7 @@ inline bool scoped_static_mutex_lock::locked()const namespace boost{ class BOOST_REGEX_DECL scoped_static_mutex_lock; -extern "C" BOOST_REGEX_DECL void free_static_mutex(); +extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex(); class BOOST_REGEX_DECL static_mutex { diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 0782e456..ee207d08 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -811,8 +811,43 @@ void basic_regex_creator::fixup_recursions(re_syntax_base* state) { if((p->type == syntax_element_startmark) && (static_cast(p)->index == id)) { + // + // We've found the target of the recursion, set the jump target: + // static_cast(state)->alt.p = p; ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast(p)->index == id) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast(state)->state_id = next_rep_id - 1; + } + break; } p = p->next.p; diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index c85ad9a5..3ea4d645 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -1939,7 +1939,9 @@ bool basic_regex_parser::parse_perl_extension() } insert_recursion: pb->index = markid = 0; - static_cast(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v; + re_recurse* pr = static_cast(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = v; + pr->state_id = 0; static_cast( this->append_state(syntax_element_toggle_case, sizeof(re_case)) )->icase = this->flags() & regbase::icase; diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 5bac21c0..52e0bcec 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -277,10 +277,15 @@ public: else { repeater_count* p = next; - while(p->state_id != state_id) + while(p && (p->state_id != state_id)) p = p->next; - count = p->count; - start_pos = p->start_pos; + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; } } ~repeater_count() diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 343afc02..0fcd4548 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -904,10 +904,15 @@ bool perl_matcher::match_recursion() } recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; recursion_stack[recursion_stack_position].results = *m_presult; + if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(static_cast(pstate)->state_id, &next_count); + } pstate = static_cast(pstate)->alt.p; recursion_stack[recursion_stack_position].id = static_cast(pstate)->index; ++recursion_stack_position; //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id); + return true; } diff --git a/include/boost/regex/v4/states.hpp b/include/boost/regex/v4/states.hpp index efdebbe5..38690b22 100644 --- a/include/boost/regex/v4/states.hpp +++ b/include/boost/regex/v4/states.hpp @@ -248,6 +248,14 @@ struct re_repeat : public re_alt bool greedy; // True if this is a greedy repeat }; +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + /*** enum re_jump_size_type ******************************************* Provides compiled size of re_jump structure (allowing for trailing alignment). We provide this so we know how manybytes to insert when constructing the machine diff --git a/src/static_mutex.cpp b/src/static_mutex.cpp index 040b52dd..d14feb1e 100644 --- a/src/static_mutex.cpp +++ b/src/static_mutex.cpp @@ -124,7 +124,7 @@ void scoped_static_mutex_lock::unlock() boost::recursive_mutex* static_mutex::m_pmutex = 0; boost::once_flag static_mutex::m_once = BOOST_ONCE_INIT; -extern "C" BOOST_REGEX_DECL void free_static_mutex() +extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex() { delete static_mutex::m_pmutex; static_mutex::m_pmutex = 0; @@ -133,7 +133,7 @@ extern "C" BOOST_REGEX_DECL void free_static_mutex() void static_mutex::init() { m_pmutex = new boost::recursive_mutex(); - int r = atexit(free_static_mutex); + int r = atexit(boost_regex_free_static_mutex); BOOST_ASSERT(0 == r); } diff --git a/test/regress/test_perl_ex.cpp b/test/regress/test_perl_ex.cpp index 3c58fa42..d58da2a7 100644 --- a/test/regress/test_perl_ex.cpp +++ b/test/regress/test_perl_ex.cpp @@ -892,5 +892,9 @@ void test_recursion() TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.0.0.0", match_default, make_array(0, 8, 6, 8, -1, -1, -2, -2)); TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "10.6", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("\\b(?&byte)(\\.(?&byte)){3}(?(DEFINE)(?2[0-4]\\d|25[0-5]|1\\d\\d|[1-9]?\\d))", perl|mod_x, "455.3.4.5", match_default, make_array(-2, -2)); + + // Bugs: + TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2)); + TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ } } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2)); }