diff --git a/include/boost/regex.hpp b/include/boost/regex.hpp index 619e2583..bd22b18d 100644 --- a/include/boost/regex.hpp +++ b/include/boost/regex.hpp @@ -216,7 +216,7 @@ struct re_syntax_base // marks start or end of (...) struct re_brace : public re_syntax_base { - unsigned int index; + int index; }; // diff --git a/include/boost/regex/detail/regex_compile.hpp b/include/boost/regex/detail/regex_compile.hpp index b56c9e49..c903f341 100644 --- a/include/boost/regex/detail/regex_compile.hpp +++ b/include/boost/regex/detail/regex_compile.hpp @@ -444,6 +444,12 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( switch(node->type) { case re_detail::syntax_element_startmark: + if(static_cast(node)->index == -1) + { + return probe_start(node->next.p->next.p, cc, terminal) + && probe_start(static_cast(node->next.p)->alt.p, cc, terminal); + } + // fall through: case re_detail::syntax_element_endmark: case re_detail::syntax_element_start_line: case re_detail::syntax_element_word_boundary: @@ -1207,7 +1213,8 @@ void BOOST_REGEX_CALL reg_expression::fixup_apply(re_d } goto rebase; case re_detail::syntax_element_endmark: - pb[((re_detail::re_brace*)ptr)->index] = true; + if(((re_detail::re_brace*)ptr)->index > 0) + pb[((re_detail::re_brace*)ptr)->index] = true; goto rebase; default: rebase: @@ -1261,7 +1268,7 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr const charT* ptr = p; marks = 0; re_detail::jstack mark(64, data.allocator()); - re_detail::jstack markid(64, data.allocator()); + re_detail::jstack markid(64, data.allocator()); unsigned int last_mark_popped = 0; register traits_size_type c; register re_detail::re_syntax_base* dat; @@ -1325,6 +1332,28 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr markid.push(0); ++ptr; continue; + case traits_type::syntax_equal: + ((re_detail::re_brace*)dat)->index = -1; + markid.pop(); + markid.push(-1); + common_forward_assert: + --marks; + ++ptr; + // extend: + dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size); + data.align(); + // + // we don't know what value to put here yet, + // use an arbitrarily large value for now + // and check it later: + ((re_detail::re_jump*)dat)->alt.i = INT_MAX/2; + mark.push(data.size() - re_detail::re_jump_size); + continue; + case traits_type::syntax_not: + ((re_detail::re_brace*)dat)->index = -2; + markid.pop(); + markid.push(-2); + goto common_forward_assert; case traits_type::syntax_hash: // comment just skip it: ((re_detail::re_brace*)dat)->index = 0; diff --git a/include/boost/regex/detail/regex_match.hpp b/include/boost/regex/detail/regex_match.hpp index 026cc33c..3c385ad3 100644 --- a/include/boost/regex/detail/regex_match.hpp +++ b/include/boost/regex/detail/regex_match.hpp @@ -302,6 +302,7 @@ bool query_match_aux(iterator first, const re_syntax_base* ptr = access::first(e); bool match_found = false; bool have_partial_match = false; + bool unwind_stack = false; bool need_push_match = (e.mark_count() > 1); int cur_acc = -1; // no active accumulator pd.set_accumulator_size(access::repeat_count(e)); @@ -357,13 +358,46 @@ bool query_match_aux(iterator first, } goto failure; case syntax_element_startmark: + start_mark_jump: if(((re_brace*)ptr)->index > 0) + { temp_match.set_first(first, ((re_brace*)ptr)->index); + } + else if( + (((re_brace*)ptr)->index == -1) + || (((re_brace*)ptr)->index == -2) + ) + { + matches.push(temp_match); + for(k = 0; k <= cur_acc; ++k) + prev_pos.push(start_loop[k]); + prev_pos.push(first); + prev_record.push(ptr); + for(k = 0; k <= cur_acc; ++k) + prev_acc.push(accumulators[k]); + prev_acc.push(cur_acc); + prev_acc.push(match_found); + match_found = false; + // skip next jump and fall through: + ptr = ptr->next.p; + } ptr = ptr->next.p; break; case syntax_element_endmark: + end_mark_jump: if(((re_brace*)ptr)->index > 0) + { temp_match.set_second(first, ((re_brace*)ptr)->index); + } + else if( + (((re_brace*)ptr)->index == -1) + || (((re_brace*)ptr)->index == -2) + ) + { + match_found = true; + unwind_stack = true; + goto failure; + } ptr = ptr->next.p; break; case syntax_element_literal: @@ -773,13 +807,9 @@ bool query_match_aux(iterator first, case syntax_element_match: goto match_jump; case syntax_element_startmark: - temp_match.set_first(first, ((re_brace*)ptr)->index); - ptr = ptr->next.p; - break; + goto start_mark_jump; case syntax_element_endmark: - temp_match.set_second(first, ((re_brace*)ptr)->index); - ptr = ptr->next.p; - break; + goto end_mark_jump; case syntax_element_start_line: goto outer_line_check; case syntax_element_end_line: @@ -915,6 +945,7 @@ bool query_match_aux(iterator first, for(k = cur_acc; k >= 0; --k) prev_pos.pop(start_loop[k]); prev_record.pop(); + if(unwind_stack) goto failure; // unwinding forward assert goto retry; case syntax_element_rep: { @@ -933,6 +964,7 @@ bool query_match_aux(iterator first, for(k = cur_acc; k >= 0; --k) prev_acc.pop(accumulators[k]); prev_record.pop(); + if(unwind_stack) goto failure; // unwinding forward assert if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max) goto failure; // repetions exhausted. // @@ -947,11 +979,42 @@ bool query_match_aux(iterator first, start_loop[cur_acc] = first; goto retry; } + case syntax_element_startmark: + { + bool saved_matched = match_found; + matches.pop(temp_match); + match_found = prev_acc.peek(); + prev_acc.pop(); + prev_acc.pop(cur_acc); + for(k = cur_acc; k >= 0; --k) + prev_acc.pop(accumulators[k]); + prev_pos.pop(first); + for(k = cur_acc; k >= 0; --k) + prev_pos.pop(start_loop[k]); + prev_record.pop(); + unwind_stack = false; + if(static_cast(ptr)->index == -1) + { + if (saved_matched == false) + goto failure; + ptr = static_cast(ptr->next.p)->alt.p->next.p; + goto retry; + } + if(static_cast(ptr)->index == -2) + { + if (saved_matched == true) + goto failure; + ptr = static_cast(ptr->next.p)->alt.p->next.p; + goto retry; + } + else goto failure; + } case syntax_element_match: if(need_push_match) matches.pop(temp_match); prev_pos.pop(first); prev_record.pop(); + if(unwind_stack) goto failure; // unwinding forward assert goto retry; default: jm_assert(0); diff --git a/include/boost/regex/regex_traits.hpp b/include/boost/regex/regex_traits.hpp index b9a508d8..d3267700 100644 --- a/include/boost/regex/regex_traits.hpp +++ b/include/boost/regex/regex_traits.hpp @@ -118,7 +118,10 @@ struct BOOST_REGEX_DECL regex_traits_base syntax_Z = 51, // for \Z syntax_G = 52, // for \G - syntax_max = 53 + // new extentions: + syntax_not = 53, // for (?!...) + + syntax_max = 54 }; }; diff --git a/src/c_regex_traits_common.cpp b/src/c_regex_traits_common.cpp index a7947f17..e14ba618 100644 --- a/src/c_regex_traits_common.cpp +++ b/src/c_regex_traits_common.cpp @@ -194,6 +194,8 @@ const mss default_messages[] = { { 100 + c_regex_traits::syntax_Z, "Z", }, { 100 + c_regex_traits::syntax_G, "G", }, + { 100 + c_regex_traits::syntax_not, "!", }, + { 0, "", }, }; diff --git a/src/regex_debug.cpp b/src/regex_debug.cpp index 9286e1cc..b8688d81 100644 --- a/src/regex_debug.cpp +++ b/src/regex_debug.cpp @@ -24,14 +24,16 @@ #define BOOST_REGEX_SOURCE #include -#include -#include + +#ifdef BOOST_REGEX_DEBUG #ifdef BOOST_MSVC #include #endif -#ifdef BOOST_REGEX_DEBUG +#include +#include + #ifndef BOOST_RE_OLD_IOSTREAM #include #else diff --git a/test/regress/tests.txt b/test/regress/tests.txt index 10c61b35..60f7e460 100644 --- a/test/regress/tests.txt +++ b/test/regress/tests.txt @@ -869,17 +869,19 @@ a+(?#b+)b+ xaaabbba 1 7 (xyz)(.*)abc xyz -1 -1 0 3 3 3 (xyz)(.*)abc xy -1 -1 -1 -1 -1 -1 +; +; forward lookahead asserts added 21/01/02 +- match_default normal REG_EXTENDED +((?:(?!a|b)\w)+)(\w+) " xxxabaxxx " 2 11 2 5 5 11 +/\*(?:(?!\*/).)*\*/ " /**/ " 2 6 +/\*(?:(?!\*/).)*\*/ " /***/ " 2 7 +/\*(?:(?!\*/).)*\*/ " /********/ " 2 12 +/\*(?:(?!\*/).)*\*/ " /* comment */ " 2 15 +<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " here " 1 24 16 20 +<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " here< / a > " 1 28 16 20 - - - - - - - - - - +<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " here " 1 20 16 20 +<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " here< / a > " 1 20 16 20