forked from boostorg/regex
Added support for forward lookahead asserts.
[SVN r12455]
This commit is contained in:
@ -216,7 +216,7 @@ struct re_syntax_base
|
|||||||
// marks start or end of (...)
|
// marks start or end of (...)
|
||||||
struct re_brace : public re_syntax_base
|
struct re_brace : public re_syntax_base
|
||||||
{
|
{
|
||||||
unsigned int index;
|
int index;
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -444,6 +444,12 @@ bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
|
|||||||
switch(node->type)
|
switch(node->type)
|
||||||
{
|
{
|
||||||
case re_detail::syntax_element_startmark:
|
case re_detail::syntax_element_startmark:
|
||||||
|
if(static_cast<const re_detail::re_brace*>(node)->index == -1)
|
||||||
|
{
|
||||||
|
return probe_start(node->next.p->next.p, cc, terminal)
|
||||||
|
&& probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
|
||||||
|
}
|
||||||
|
// fall through:
|
||||||
case re_detail::syntax_element_endmark:
|
case re_detail::syntax_element_endmark:
|
||||||
case re_detail::syntax_element_start_line:
|
case re_detail::syntax_element_start_line:
|
||||||
case re_detail::syntax_element_word_boundary:
|
case re_detail::syntax_element_word_boundary:
|
||||||
@ -1207,6 +1213,7 @@ void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_d
|
|||||||
}
|
}
|
||||||
goto rebase;
|
goto rebase;
|
||||||
case re_detail::syntax_element_endmark:
|
case re_detail::syntax_element_endmark:
|
||||||
|
if(((re_detail::re_brace*)ptr)->index > 0)
|
||||||
pb[((re_detail::re_brace*)ptr)->index] = true;
|
pb[((re_detail::re_brace*)ptr)->index] = true;
|
||||||
goto rebase;
|
goto rebase;
|
||||||
default:
|
default:
|
||||||
@ -1261,7 +1268,7 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
|
|||||||
const charT* ptr = p;
|
const charT* ptr = p;
|
||||||
marks = 0;
|
marks = 0;
|
||||||
re_detail::jstack<unsigned int, Allocator> mark(64, data.allocator());
|
re_detail::jstack<unsigned int, Allocator> mark(64, data.allocator());
|
||||||
re_detail::jstack<unsigned int, Allocator> markid(64, data.allocator());
|
re_detail::jstack<int, Allocator> markid(64, data.allocator());
|
||||||
unsigned int last_mark_popped = 0;
|
unsigned int last_mark_popped = 0;
|
||||||
register traits_size_type c;
|
register traits_size_type c;
|
||||||
register re_detail::re_syntax_base* dat;
|
register re_detail::re_syntax_base* dat;
|
||||||
@ -1325,6 +1332,28 @@ unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expr
|
|||||||
markid.push(0);
|
markid.push(0);
|
||||||
++ptr;
|
++ptr;
|
||||||
continue;
|
continue;
|
||||||
|
case traits_type::syntax_equal:
|
||||||
|
((re_detail::re_brace*)dat)->index = -1;
|
||||||
|
markid.pop();
|
||||||
|
markid.push(-1);
|
||||||
|
common_forward_assert:
|
||||||
|
--marks;
|
||||||
|
++ptr;
|
||||||
|
// extend:
|
||||||
|
dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
|
||||||
|
data.align();
|
||||||
|
//
|
||||||
|
// we don't know what value to put here yet,
|
||||||
|
// use an arbitrarily large value for now
|
||||||
|
// and check it later:
|
||||||
|
((re_detail::re_jump*)dat)->alt.i = INT_MAX/2;
|
||||||
|
mark.push(data.size() - re_detail::re_jump_size);
|
||||||
|
continue;
|
||||||
|
case traits_type::syntax_not:
|
||||||
|
((re_detail::re_brace*)dat)->index = -2;
|
||||||
|
markid.pop();
|
||||||
|
markid.push(-2);
|
||||||
|
goto common_forward_assert;
|
||||||
case traits_type::syntax_hash:
|
case traits_type::syntax_hash:
|
||||||
// comment just skip it:
|
// comment just skip it:
|
||||||
((re_detail::re_brace*)dat)->index = 0;
|
((re_detail::re_brace*)dat)->index = 0;
|
||||||
|
@ -302,6 +302,7 @@ bool query_match_aux(iterator first,
|
|||||||
const re_syntax_base* ptr = access::first(e);
|
const re_syntax_base* ptr = access::first(e);
|
||||||
bool match_found = false;
|
bool match_found = false;
|
||||||
bool have_partial_match = false;
|
bool have_partial_match = false;
|
||||||
|
bool unwind_stack = false;
|
||||||
bool need_push_match = (e.mark_count() > 1);
|
bool need_push_match = (e.mark_count() > 1);
|
||||||
int cur_acc = -1; // no active accumulator
|
int cur_acc = -1; // no active accumulator
|
||||||
pd.set_accumulator_size(access::repeat_count(e));
|
pd.set_accumulator_size(access::repeat_count(e));
|
||||||
@ -357,13 +358,46 @@ bool query_match_aux(iterator first,
|
|||||||
}
|
}
|
||||||
goto failure;
|
goto failure;
|
||||||
case syntax_element_startmark:
|
case syntax_element_startmark:
|
||||||
|
start_mark_jump:
|
||||||
if(((re_brace*)ptr)->index > 0)
|
if(((re_brace*)ptr)->index > 0)
|
||||||
|
{
|
||||||
temp_match.set_first(first, ((re_brace*)ptr)->index);
|
temp_match.set_first(first, ((re_brace*)ptr)->index);
|
||||||
|
}
|
||||||
|
else if(
|
||||||
|
(((re_brace*)ptr)->index == -1)
|
||||||
|
|| (((re_brace*)ptr)->index == -2)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
matches.push(temp_match);
|
||||||
|
for(k = 0; k <= cur_acc; ++k)
|
||||||
|
prev_pos.push(start_loop[k]);
|
||||||
|
prev_pos.push(first);
|
||||||
|
prev_record.push(ptr);
|
||||||
|
for(k = 0; k <= cur_acc; ++k)
|
||||||
|
prev_acc.push(accumulators[k]);
|
||||||
|
prev_acc.push(cur_acc);
|
||||||
|
prev_acc.push(match_found);
|
||||||
|
match_found = false;
|
||||||
|
// skip next jump and fall through:
|
||||||
|
ptr = ptr->next.p;
|
||||||
|
}
|
||||||
ptr = ptr->next.p;
|
ptr = ptr->next.p;
|
||||||
break;
|
break;
|
||||||
case syntax_element_endmark:
|
case syntax_element_endmark:
|
||||||
|
end_mark_jump:
|
||||||
if(((re_brace*)ptr)->index > 0)
|
if(((re_brace*)ptr)->index > 0)
|
||||||
|
{
|
||||||
temp_match.set_second(first, ((re_brace*)ptr)->index);
|
temp_match.set_second(first, ((re_brace*)ptr)->index);
|
||||||
|
}
|
||||||
|
else if(
|
||||||
|
(((re_brace*)ptr)->index == -1)
|
||||||
|
|| (((re_brace*)ptr)->index == -2)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
match_found = true;
|
||||||
|
unwind_stack = true;
|
||||||
|
goto failure;
|
||||||
|
}
|
||||||
ptr = ptr->next.p;
|
ptr = ptr->next.p;
|
||||||
break;
|
break;
|
||||||
case syntax_element_literal:
|
case syntax_element_literal:
|
||||||
@ -773,13 +807,9 @@ bool query_match_aux(iterator first,
|
|||||||
case syntax_element_match:
|
case syntax_element_match:
|
||||||
goto match_jump;
|
goto match_jump;
|
||||||
case syntax_element_startmark:
|
case syntax_element_startmark:
|
||||||
temp_match.set_first(first, ((re_brace*)ptr)->index);
|
goto start_mark_jump;
|
||||||
ptr = ptr->next.p;
|
|
||||||
break;
|
|
||||||
case syntax_element_endmark:
|
case syntax_element_endmark:
|
||||||
temp_match.set_second(first, ((re_brace*)ptr)->index);
|
goto end_mark_jump;
|
||||||
ptr = ptr->next.p;
|
|
||||||
break;
|
|
||||||
case syntax_element_start_line:
|
case syntax_element_start_line:
|
||||||
goto outer_line_check;
|
goto outer_line_check;
|
||||||
case syntax_element_end_line:
|
case syntax_element_end_line:
|
||||||
@ -915,6 +945,7 @@ bool query_match_aux(iterator first,
|
|||||||
for(k = cur_acc; k >= 0; --k)
|
for(k = cur_acc; k >= 0; --k)
|
||||||
prev_pos.pop(start_loop[k]);
|
prev_pos.pop(start_loop[k]);
|
||||||
prev_record.pop();
|
prev_record.pop();
|
||||||
|
if(unwind_stack) goto failure; // unwinding forward assert
|
||||||
goto retry;
|
goto retry;
|
||||||
case syntax_element_rep:
|
case syntax_element_rep:
|
||||||
{
|
{
|
||||||
@ -933,6 +964,7 @@ bool query_match_aux(iterator first,
|
|||||||
for(k = cur_acc; k >= 0; --k)
|
for(k = cur_acc; k >= 0; --k)
|
||||||
prev_acc.pop(accumulators[k]);
|
prev_acc.pop(accumulators[k]);
|
||||||
prev_record.pop();
|
prev_record.pop();
|
||||||
|
if(unwind_stack) goto failure; // unwinding forward assert
|
||||||
if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max)
|
if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max)
|
||||||
goto failure; // repetions exhausted.
|
goto failure; // repetions exhausted.
|
||||||
//
|
//
|
||||||
@ -947,11 +979,42 @@ bool query_match_aux(iterator first,
|
|||||||
start_loop[cur_acc] = first;
|
start_loop[cur_acc] = first;
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
case syntax_element_startmark:
|
||||||
|
{
|
||||||
|
bool saved_matched = match_found;
|
||||||
|
matches.pop(temp_match);
|
||||||
|
match_found = prev_acc.peek();
|
||||||
|
prev_acc.pop();
|
||||||
|
prev_acc.pop(cur_acc);
|
||||||
|
for(k = cur_acc; k >= 0; --k)
|
||||||
|
prev_acc.pop(accumulators[k]);
|
||||||
|
prev_pos.pop(first);
|
||||||
|
for(k = cur_acc; k >= 0; --k)
|
||||||
|
prev_pos.pop(start_loop[k]);
|
||||||
|
prev_record.pop();
|
||||||
|
unwind_stack = false;
|
||||||
|
if(static_cast<const re_brace*>(ptr)->index == -1)
|
||||||
|
{
|
||||||
|
if (saved_matched == false)
|
||||||
|
goto failure;
|
||||||
|
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
if(static_cast<const re_brace*>(ptr)->index == -2)
|
||||||
|
{
|
||||||
|
if (saved_matched == true)
|
||||||
|
goto failure;
|
||||||
|
ptr = static_cast<const re_jump*>(ptr->next.p)->alt.p->next.p;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
else goto failure;
|
||||||
|
}
|
||||||
case syntax_element_match:
|
case syntax_element_match:
|
||||||
if(need_push_match)
|
if(need_push_match)
|
||||||
matches.pop(temp_match);
|
matches.pop(temp_match);
|
||||||
prev_pos.pop(first);
|
prev_pos.pop(first);
|
||||||
prev_record.pop();
|
prev_record.pop();
|
||||||
|
if(unwind_stack) goto failure; // unwinding forward assert
|
||||||
goto retry;
|
goto retry;
|
||||||
default:
|
default:
|
||||||
jm_assert(0);
|
jm_assert(0);
|
||||||
|
@ -118,7 +118,10 @@ struct BOOST_REGEX_DECL regex_traits_base
|
|||||||
syntax_Z = 51, // for \Z
|
syntax_Z = 51, // for \Z
|
||||||
syntax_G = 52, // for \G
|
syntax_G = 52, // for \G
|
||||||
|
|
||||||
syntax_max = 53
|
// new extentions:
|
||||||
|
syntax_not = 53, // for (?!...)
|
||||||
|
|
||||||
|
syntax_max = 54
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -194,6 +194,8 @@ const mss default_messages[] = {
|
|||||||
{ 100 + c_regex_traits<char>::syntax_Z, "Z", },
|
{ 100 + c_regex_traits<char>::syntax_Z, "Z", },
|
||||||
{ 100 + c_regex_traits<char>::syntax_G, "G", },
|
{ 100 + c_regex_traits<char>::syntax_G, "G", },
|
||||||
|
|
||||||
|
{ 100 + c_regex_traits<char>::syntax_not, "!", },
|
||||||
|
|
||||||
{ 0, "", },
|
{ 0, "", },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -24,14 +24,16 @@
|
|||||||
#define BOOST_REGEX_SOURCE
|
#define BOOST_REGEX_SOURCE
|
||||||
|
|
||||||
#include <boost/regex/config.hpp>
|
#include <boost/regex/config.hpp>
|
||||||
#include <boost/regex/detail/regex_raw_buffer.hpp>
|
|
||||||
#include <boost/regex.hpp>
|
#ifdef BOOST_REGEX_DEBUG
|
||||||
|
|
||||||
#ifdef BOOST_MSVC
|
#ifdef BOOST_MSVC
|
||||||
#include <crtdbg.h>
|
#include <crtdbg.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef BOOST_REGEX_DEBUG
|
#include <boost/regex/detail/regex_raw_buffer.hpp>
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
#ifndef BOOST_RE_OLD_IOSTREAM
|
#ifndef BOOST_RE_OLD_IOSTREAM
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#else
|
#else
|
||||||
|
@ -869,17 +869,19 @@ a+(?#b+)b+ xaaabbba 1 7
|
|||||||
(xyz)(.*)abc xyz -1 -1 0 3 3 3
|
(xyz)(.*)abc xyz -1 -1 0 3 3 3
|
||||||
(xyz)(.*)abc xy -1 -1 -1 -1 -1 -1
|
(xyz)(.*)abc xy -1 -1 -1 -1 -1 -1
|
||||||
|
|
||||||
|
;
|
||||||
|
; forward lookahead asserts added 21/01/02
|
||||||
|
- match_default normal REG_EXTENDED
|
||||||
|
((?:(?!a|b)\w)+)(\w+) " xxxabaxxx " 2 11 2 5 5 11
|
||||||
|
|
||||||
|
/\*(?:(?!\*/).)*\*/ " /**/ " 2 6
|
||||||
|
/\*(?:(?!\*/).)*\*/ " /***/ " 2 7
|
||||||
|
/\*(?:(?!\*/).)*\*/ " /********/ " 2 12
|
||||||
|
/\*(?:(?!\*/).)*\*/ " /* comment */ " 2 15
|
||||||
|
|
||||||
|
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here</a> " 1 24 16 20
|
||||||
|
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " <a href=\"here\">here< / a > " 1 28 16 20
|
||||||
|
|
||||||
|
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here</a> " 1 20 16 20
|
||||||
|
<\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " <a href=\"here\">here< / a > " 1 20 16 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user