From c3869beffeac332e4159e3d782d5cf9c48ccc465 Mon Sep 17 00:00:00 2001 From: John Maddock Date: Tue, 8 Apr 2003 10:49:43 +0000 Subject: [PATCH] Added support for independent subs Almost complete implementation now [SVN r18205] --- .../regex/v4/perl_matcher_non_recursive.hpp | 11 ++++++++- .../boost/regex/v4/perl_matcher_recursive.hpp | 12 +++++++++- include/boost/regex/v4/regex_compile.hpp | 23 ++++++++++++++++++- include/boost/regex/v4/regex_match.hpp | 2 +- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index 8b18300c..1659a1b0 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -129,7 +129,7 @@ struct saved_single_repeat : public saved_state const re_repeat* rep; BidiIterator last_position; saved_single_repeat(unsigned c, const re_repeat* r, BidiIterator lp, int id) - : saved_state(id), count(c), last_position(lp), rep(r){} + : saved_state(id), count(c), rep(r), last_position(lp){} }; template @@ -299,6 +299,15 @@ bool perl_matcher::match_startmark( push_assertion(next_pstate, index == -1); break; } + case -3: + { + // independent sub-expression: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + pstate = next_pstate; + return r; + } default: { assert(index > 0); diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index 7a2a6b20..d7dfcfdf 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -93,6 +93,15 @@ bool perl_matcher::match_startmark( r = true; break; } + case -3: + { + // independent sub-expression: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + r = match_all_states(); + pstate = next_pstate; + break; + } default: { assert(index > 0); @@ -625,9 +634,10 @@ bool perl_matcher::backtrack_till_m #endif } -} +} // namespace re_detail } // namespace boost + #ifdef __BORLANDC__ # pragma option pop #endif diff --git a/include/boost/regex/v4/regex_compile.hpp b/include/boost/regex/v4/regex_compile.hpp index 2286f713..8dc3f209 100644 --- a/include/boost/regex/v4/regex_compile.hpp +++ b/include/boost/regex/v4/regex_compile.hpp @@ -447,6 +447,8 @@ void BOOST_REGEX_CALL reg_expression::compile_maps() if(static_cast(rep->next.p)->singleton) rep->type = re_detail::syntax_element_long_set_rep; break; + default: + break; } } } @@ -475,8 +477,18 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( return probe_start(node->next.p->next.p, cc, terminal) && probe_start(static_cast(node->next.p)->alt.p, cc, terminal); } - // fall through: + else if(static_cast(node)->index == -3) + { + return probe_start(node->next.p->next.p, cc, terminal); + } + // doesn't tell us anything about the next character, so: + return probe_start(node->next.p, cc, terminal); case re_detail::syntax_element_endmark: + if(static_cast(node)->index == -3) + { + return true; + } + // fall through: case re_detail::syntax_element_start_line: case re_detail::syntax_element_word_boundary: case re_detail::syntax_element_buffer_start: @@ -561,6 +573,10 @@ bool BOOST_REGEX_CALL reg_expression::probe_start( // we need to take the OR of the two alternatives: return probe_start(static_cast(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal); case re_detail::syntax_element_rep: + case re_detail::syntax_element_char_rep: + case re_detail::syntax_element_dot_rep: + case re_detail::syntax_element_long_set_rep: + case re_detail::syntax_element_short_set_rep: // we need to take the OR of the two alternatives if(static_cast(node)->min == 0) return probe_start(node->next.p, cc, static_cast(node)->alt.p) || probe_start(static_cast(node)->alt.p, cc, terminal); @@ -1403,6 +1419,11 @@ unsigned int BOOST_REGEX_CALL reg_expression::set_expr static_cast(dat)->alt.i = INT_MAX/2; mark.push(data.size() - re_detail::re_jump_size); continue; + case traits_type::syntax_right_word: + static_cast(dat)->index = -3; + markid.pop(); + markid.push(-3); + goto common_forward_assert; case traits_type::syntax_not: static_cast(dat)->index = -2; markid.pop(); diff --git a/include/boost/regex/v4/regex_match.hpp b/include/boost/regex/v4/regex_match.hpp index 7fc51fc3..4c4eef89 100644 --- a/include/boost/regex/v4/regex_match.hpp +++ b/include/boost/regex/v4/regex_match.hpp @@ -136,7 +136,7 @@ inline bool regex_match(const wchar_t* str, } #endif inline bool regex_match(const std::string& s, - match_results& m, + smatch& m, const regex& e, match_flag_type flags = match_default) {