forked from boostorg/regex
Added support for forward lookahead asserts.
[SVN r12455]
This commit is contained in:
@ -216,7 +216,7 @@ struct re_syntax_base
|
||||
// marks start or end of (...)
|
||||
struct re_brace : public re_syntax_base
|
||||
{
|
||||
unsigned int index;
|
||||
int index;
|
||||
};
|
||||
|
||||
//
|
||||
|
@ -444,6 +444,12 @@ bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
|
||||
switch(node->type)
|
||||
{
|
||||
case re_detail::syntax_element_startmark:
|
||||
if(static_cast<const re_detail::re_brace*>(node)->index == -1)
|
||||
{
|
||||
return probe_start(node->next.p->next.p, cc, terminal)
|
||||
&& probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
|
||||
}
|
||||
// fall through:
|
||||
case re_detail::syntax_element_endmark:
|
||||
case re_detail::syntax_element_start_line:
|
||||
case re_detail::syntax_element_word_boundary:
|
||||
@ -1207,6 +1213,7 @@ void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_d
|
||||
}
|
||||
goto rebase;
|
||||
case re_detail::syntax_element_endmark:
|
||||
if(((re_detail::re_brace*)ptr)->index > 0)
|
||||
pb[((re_detail::re_brace*)ptr)->index] = true;
|
||||
goto rebase;
|
||||
default:
|
||||
@ -1261,7 +1268,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
|
||||
const charT* ptr = p;
|
||||
marks = 0;
|
||||
re_detail::jstack<unsigned int, Allocator> mark(64, data.allocator());
|
||||
re_detail::jstack<unsigned int, Allocator> markid(64, data.allocator());
|
||||
re_detail::jstack<int, Allocator> markid(64, data.allocator());
|
||||
unsigned int last_mark_popped = 0;
|
||||
register traits_size_type c;
|
||||
register re_detail::re_syntax_base* dat;
|
||||
@ -1325,6 +1332,28 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
|
||||
markid.push(0);
|
||||
++ptr;
|
||||
continue;
|
||||
case traits_type::syntax_equal:
|
||||
((re_detail::re_brace*)dat)->index = -1;
|
||||
markid.pop();
|
||||
markid.push(-1);
|
||||
common_forward_assert:
|
||||
--marks;
|
||||
++ptr;
|
||||
// extend:
|
||||
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
|
||||
data.align();
|
||||
//
|
||||
// we don't know what value to put here yet,
|
||||
// use an arbitrarily large value for now
|
||||
// and check it later:
|
||||
((re_detail::re_jump*)dat)->alt.i = INT_MAX/2;
|
||||
mark.push(data.size() - re_detail::re_jump_size);
|
||||
continue;
|
||||
case traits_type::syntax_not:
|
||||
((re_detail::re_brace*)dat)->index = -2;
|
||||
markid.pop();
|
||||
markid.push(-2);
|
||||
goto common_forward_assert;
|
||||
case traits_type::syntax_hash:
|
||||
// comment just skip it:
|
||||
((re_detail::re_brace*)dat)->index = 0;
|
||||
|
@ -302,6 +302,7 @@ bool query_match_aux(iterator first,
|
||||
const re_syntax_base* ptr = access::first(e);
|
||||
bool match_found = false;
|
||||
bool have_partial_match = false;
|
||||
bool unwind_stack = false;
|
||||
bool need_push_match = (e.mark_count() > 1);
|
||||
int cur_acc = -1; // no active accumulator
|
||||
pd.set_accumulator_size(access::repeat_count(e));
|
||||
@ -357,13 +358,46 @@ bool query_match_aux(iterator first,
|
||||
}
|
||||
goto failure;
|
||||
case syntax_element_startmark:
|
||||
start_mark_jump:
|
||||
if(((re_brace*)ptr)->index > 0)
|
||||
{
|
||||
temp_match.set_first(first, ((re_brace*)ptr)->index);
|
||||
}
|
||||
else if(
|
||||
(((re_brace*)ptr)->index == -1)
|
||||
|| (((re_brace*)ptr)->index == -2)
|
||||
)
|
||||
{
|
||||
matches.push(temp_match);
|
||||
for(k = 0; k <= cur_acc; ++k)
|
||||
prev_pos.push(start_loop[k]);
|
||||
prev_pos.push(first);
|
||||
prev_record.push(ptr);
|
||||
for(k = 0; k <= cur_acc; ++k)
|
||||
prev_acc.push(accumulators[k]);
|
||||
prev_acc.push(cur_acc);
|
||||
prev_acc.push(match_found);
|
||||
match_found = false;
|
||||
// skip next jump and fall through:
|
||||
ptr = ptr->next.p;
|
||||
}
|
||||
ptr = ptr->next.p;
|
||||
break;
|
||||
case syntax_element_endmark:
|
||||
end_mark_jump:
|
||||
if(((re_brace*)ptr)->index > 0)
|
||||
{
|
||||
temp_match.set_second(first, ((re_brace*)ptr)->index);
|
||||
}
|
||||
else if(
|
||||
(((re_brace*)ptr)->index == -1)
|
||||
|| (((re_brace*)ptr)->index == -2)
|
||||
)
|
||||
{
|
||||
match_found = true;
|
||||
unwind_stack = true;
|
||||
goto failure;
|
||||
}
|
||||
ptr = ptr->next.p;
|
||||
break;
|
||||
case syntax_element_literal:
|
||||
@ -773,13 +807,9 @@ bool query_match_aux(iterator first,
|
||||
case syntax_element_match:
|
||||
goto match_jump;
|
||||
case syntax_element_startmark:
|
||||
temp_match.set_first(first, ((re_brace*)ptr)->index);
|
||||
ptr = ptr->next.p;
|
||||
break;
|
||||
goto start_mark_jump;
|
||||
case syntax_element_endmark:
|
||||
temp_match.set_second(first, ((re_brace*)ptr)->index);
|
||||
ptr = ptr->next.p;
|
||||
break;
|
||||
goto end_mark_jump;
|
||||
case syntax_element_start_line:
|
||||
goto outer_line_check;
|
||||
case syntax_element_end_line:
|
||||
@ -915,6 +945,7 @@ bool query_match_aux(iterator first,
|
||||
for(k = cur_acc; k >= 0; --k)
|
||||
prev_pos.pop(start_loop[k]);
|
||||
prev_record.pop();
|
||||
if(unwind_stack) goto failure; // unwinding forward assert
|
||||
goto retry;
|
||||
case syntax_element_rep:
|
||||
{
|
||||
@ -933,6 +964,7 @@ bool query_match_aux(iterator first,
|
||||
for(k = cur_acc; k >= 0; --k)
|
||||
prev_acc.pop(accumulators[k]);
|
||||
prev_record.pop();
|
||||
if(unwind_stack) goto failure; // unwinding forward assert
|
||||
if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max)
|
||||
goto failure; // repetions exhausted.
|
||||
//
|
||||
@ -947,11 +979,42 @@ bool query_match_aux(iterator first,
|
||||
start_loop[cur_acc] = first;
|
||||
goto retry;
|
||||
}
|
||||
case syntax_element_startmark:
|
||||
{
|
||||
bool saved_matched = match_found;
|
||||
matches.pop(temp_match);
|
||||
match_found = prev_acc.peek();
|
||||
prev_acc.pop();
|
||||
prev_acc.pop(cur_acc);
|
||||
for(k = cur_acc; k >= 0; --k)
|
||||
prev_acc.pop(accumulators[k]);
|
||||
prev_pos.pop(first);
|
||||
for(k = cur_acc; k >= 0; --k)
|
||||
prev_pos.pop(start_loop[k]);
|
||||
prev_record.pop();
|
||||
unwind_stack = false;
|
||||
if(static_cast<const re_brace*>(ptr)->index == -1)
|
||||
{
|
||||
if (saved_matched == false)
|
||||
goto failure;
|
||||
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
|
||||
goto retry;
|
||||
}
|
||||
if(static_cast<const re_brace*>(ptr)->index == -2)
|
||||
{
|
||||
if (saved_matched == true)
|
||||
goto failure;
|
||||
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
|
||||
goto retry;
|
||||
}
|
||||
else goto failure;
|
||||
}
|
||||
case syntax_element_match:
|
||||
if(need_push_match)
|
||||
matches.pop(temp_match);
|
||||
prev_pos.pop(first);
|
||||
prev_record.pop();
|
||||
if(unwind_stack) goto failure; // unwinding forward assert
|
||||
goto retry;
|
||||
default:
|
||||
jm_assert(0);
|
||||
|
@ -118,7 +118,10 @@ struct BOOST_REGEX_DECL regex_traits_base
|
||||
syntax_Z = 51, // for \Z
|
||||
syntax_G = 52, // for \G
|
||||
|
||||
syntax_max = 53
|
||||
// new extentions:
|
||||
syntax_not = 53, // for (?!...)
|
||||
|
||||
syntax_max = 54
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -194,6 +194,8 @@ const mss default_messages[] = {
|
||||
{ 100 + c_regex_traits<char>::syntax_Z, "Z", },
|
||||
{ 100 + c_regex_traits<char>::syntax_G, "G", },
|
||||
|
||||
{ 100 + c_regex_traits<char>::syntax_not, "!", },
|
||||
|
||||
{ 0, "", },
|
||||
};
|
||||
|
||||
|
@ -24,14 +24,16 @@
|
||||
#define BOOST_REGEX_SOURCE
|
||||
|
||||
#include <boost/regex/config.hpp>
|
||||
#include <boost/regex/detail/regex_raw_buffer.hpp>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
#include <boost/regex/detail/regex_raw_buffer.hpp>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
#ifndef BOOST_RE_OLD_IOSTREAM
|
||||
#include <ostream>
|
||||
#else
|
||||
|
@ -869,17 +869,19 @@ a+(?#b+)b+ xaaabbba 1 7
|
||||
(xyz)(.*)abc xyz -1 -1 0 3 3 3
|
||||
(xyz)(.*)abc xy -1 -1 -1 -1 -1 -1
|
||||
|
||||
;
|
||||
; forward lookahead asserts added 21/01/02
|
||||
- match_default normal REG_EXTENDED
|
||||
((?:(?!a|b)\w)+)(\w+) " xxxabaxxx " 2 11 2 5 5 11
|
||||
|
||||
/\*(?:(?!\*/).)*\*/ " /**/ " 2 6
|
||||
/\*(?:(?!\*/).)*\*/ " /***/ " 2 7
|
||||
/\*(?:(?!\*/).)*\*/ " /********/ " 2 12
|
||||
/\*(?:(?!\*/).)*\*/ " /* comment */ " 2 15
|
||||
|
||||
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here</a> " 1 24 16 20
|
||||
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here< / a > " 1 28 16 20
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here</a> " 1 20 16 20
|
||||
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here< / a > " 1 20 16 20
|
||||
|
||||
|
Reference in New Issue
Block a user