Added support for forward lookahead asserts.

[SVN r12455]
This commit is contained in:
John Maddock
2002-01-23 12:42:31 +00:00
parent e993619390
commit c07c4e0a11
7 changed files with 124 additions and 23 deletions

View File

@ -216,7 +216,7 @@ struct re_syntax_base
// marks start or end of (...)
struct re_brace : public re_syntax_base
{
unsigned int index;
int index;
};
//

View File

@ -444,6 +444,12 @@ bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
switch(node->type)
{
case re_detail::syntax_element_startmark:
if(static_cast<const re_detail::re_brace*>(node)->index == -1)
{
return probe_start(node->next.p->next.p, cc, terminal)
&& probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
}
// fall through:
case re_detail::syntax_element_endmark:
case re_detail::syntax_element_start_line:
case re_detail::syntax_element_word_boundary:
@ -1207,6 +1213,7 @@ void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_d
}
goto rebase;
case re_detail::syntax_element_endmark:
if(((re_detail::re_brace*)ptr)->index > 0)
pb[((re_detail::re_brace*)ptr)->index] = true;
goto rebase;
default:
@ -1261,7 +1268,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
const charT* ptr = p;
marks = 0;
re_detail::jstack<unsigned int, Allocator> mark(64, data.allocator());
re_detail::jstack<unsigned int, Allocator> markid(64, data.allocator());
re_detail::jstack<int, Allocator> markid(64, data.allocator());
unsigned int last_mark_popped = 0;
register traits_size_type c;
register re_detail::re_syntax_base* dat;
@ -1325,6 +1332,28 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
markid.push(0);
++ptr;
continue;
case traits_type::syntax_equal:
((re_detail::re_brace*)dat)->index = -1;
markid.pop();
markid.push(-1);
common_forward_assert:
--marks;
++ptr;
// extend:
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
data.align();
//
// we don't know what value to put here yet,
// use an arbitrarily large value for now
// and check it later:
((re_detail::re_jump*)dat)->alt.i = INT_MAX/2;
mark.push(data.size() - re_detail::re_jump_size);
continue;
case traits_type::syntax_not:
((re_detail::re_brace*)dat)->index = -2;
markid.pop();
markid.push(-2);
goto common_forward_assert;
case traits_type::syntax_hash:
// comment just skip it:
((re_detail::re_brace*)dat)->index = 0;

View File

@ -302,6 +302,7 @@ bool query_match_aux(iterator first,
const re_syntax_base* ptr = access::first(e);
bool match_found = false;
bool have_partial_match = false;
bool unwind_stack = false;
bool need_push_match = (e.mark_count() > 1);
int cur_acc = -1; // no active accumulator
pd.set_accumulator_size(access::repeat_count(e));
@ -357,13 +358,46 @@ bool query_match_aux(iterator first,
}
goto failure;
case syntax_element_startmark:
start_mark_jump:
if(((re_brace*)ptr)->index > 0)
{
temp_match.set_first(first, ((re_brace*)ptr)->index);
}
else if(
(((re_brace*)ptr)->index == -1)
|| (((re_brace*)ptr)->index == -2)
)
{
matches.push(temp_match);
for(k = 0; k <= cur_acc; ++k)
prev_pos.push(start_loop[k]);
prev_pos.push(first);
prev_record.push(ptr);
for(k = 0; k <= cur_acc; ++k)
prev_acc.push(accumulators[k]);
prev_acc.push(cur_acc);
prev_acc.push(match_found);
match_found = false;
// skip next jump and fall through:
ptr = ptr->next.p;
}
ptr = ptr->next.p;
break;
case syntax_element_endmark:
end_mark_jump:
if(((re_brace*)ptr)->index > 0)
{
temp_match.set_second(first, ((re_brace*)ptr)->index);
}
else if(
(((re_brace*)ptr)->index == -1)
|| (((re_brace*)ptr)->index == -2)
)
{
match_found = true;
unwind_stack = true;
goto failure;
}
ptr = ptr->next.p;
break;
case syntax_element_literal:
@ -773,13 +807,9 @@ bool query_match_aux(iterator first,
case syntax_element_match:
goto match_jump;
case syntax_element_startmark:
temp_match.set_first(first, ((re_brace*)ptr)->index);
ptr = ptr->next.p;
break;
goto start_mark_jump;
case syntax_element_endmark:
temp_match.set_second(first, ((re_brace*)ptr)->index);
ptr = ptr->next.p;
break;
goto end_mark_jump;
case syntax_element_start_line:
goto outer_line_check;
case syntax_element_end_line:
@ -915,6 +945,7 @@ bool query_match_aux(iterator first,
for(k = cur_acc; k >= 0; --k)
prev_pos.pop(start_loop[k]);
prev_record.pop();
if(unwind_stack) goto failure; // unwinding forward assert
goto retry;
case syntax_element_rep:
{
@ -933,6 +964,7 @@ bool query_match_aux(iterator first,
for(k = cur_acc; k >= 0; --k)
prev_acc.pop(accumulators[k]);
prev_record.pop();
if(unwind_stack) goto failure; // unwinding forward assert
if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max)
goto failure; // repetions exhausted.
//
@ -947,11 +979,42 @@ bool query_match_aux(iterator first,
start_loop[cur_acc] = first;
goto retry;
}
case syntax_element_startmark:
{
bool saved_matched = match_found;
matches.pop(temp_match);
match_found = prev_acc.peek();
prev_acc.pop();
prev_acc.pop(cur_acc);
for(k = cur_acc; k >= 0; --k)
prev_acc.pop(accumulators[k]);
prev_pos.pop(first);
for(k = cur_acc; k >= 0; --k)
prev_pos.pop(start_loop[k]);
prev_record.pop();
unwind_stack = false;
if(static_cast<const re_brace*>(ptr)->index == -1)
{
if (saved_matched == false)
goto failure;
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
goto retry;
}
if(static_cast<const re_brace*>(ptr)->index == -2)
{
if (saved_matched == true)
goto failure;
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
goto retry;
}
else goto failure;
}
case syntax_element_match:
if(need_push_match)
matches.pop(temp_match);
prev_pos.pop(first);
prev_record.pop();
if(unwind_stack) goto failure; // unwinding forward assert
goto retry;
default:
jm_assert(0);

View File

@ -118,7 +118,10 @@ struct BOOST_REGEX_DECL regex_traits_base
syntax_Z = 51, // for \Z
syntax_G = 52, // for \G
syntax_max = 53
// new extentions:
syntax_not = 53, // for (?!...)
syntax_max = 54
};
};

View File

@ -194,6 +194,8 @@ const mss default_messages[] = {
{ 100 + c_regex_traits<char>::syntax_Z, "Z", },
{ 100 + c_regex_traits<char>::syntax_G, "G", },
{ 100 + c_regex_traits<char>::syntax_not, "!", },
{ 0, "", },
};

View File

@ -24,14 +24,16 @@
#define BOOST_REGEX_SOURCE
#include <boost/regex/config.hpp>
#include <boost/regex/detail/regex_raw_buffer.hpp>
#include <boost/regex.hpp>
#ifdef BOOST_REGEX_DEBUG
#ifdef BOOST_MSVC
#include <crtdbg.h>
#endif
#ifdef BOOST_REGEX_DEBUG
#include <boost/regex/detail/regex_raw_buffer.hpp>
#include <boost/regex.hpp>
#ifndef BOOST_RE_OLD_IOSTREAM
#include <ostream>
#else

View File

@ -869,17 +869,19 @@ a+(?#b+)b+ xaaabbba 1 7
(xyz)(.*)abc xyz -1 -1 0 3 3 3
(xyz)(.*)abc xy -1 -1 -1 -1 -1 -1
;
; forward lookahead asserts added 21/01/02
- match_default normal REG_EXTENDED
((?:(?!a|b)\w)+)(\w+) " xxxabaxxx " 2 11 2 5 5 11
/\*(?:(?!\*/).)*\*/ " /**/ " 2 6
/\*(?:(?!\*/).)*\*/ " /***/ " 2 7
/\*(?:(?!\*/).)*\*/ " /********/ " 2 12
/\*(?:(?!\*/).)*\*/ " /* comment */ " 2 15
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here</a> " 1 24 16 20
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here< / a > " 1 28 16 20
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here</a> " 1 20 16 20
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here< / a > " 1 20 16 20