Added initial support for recursive expressions.

Updated docs and tests accordingly.

[SVN r54994]
This commit is contained in:
John Maddock
2009-07-17 10:23:50 +00:00
parent 02a629baf7
commit 5a6bc29d7c
44 changed files with 1013 additions and 264 deletions

View File

@ -194,6 +194,7 @@ struct regex_data : public named_subexpressions<charT>
std::vector<
std::pair<
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
bool m_has_recursions; // whether we have recursive expressions;
};
//
// class basic_regex_implementation

View File

@ -240,6 +240,7 @@ protected:
bool m_has_backrefs; // true if there are actually any backrefs
unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
bool m_has_recursions; // set when we have recursive expresisons to fixup
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
@ -250,6 +251,7 @@ private:
basic_regex_creator(const basic_regex_creator&);
void fixup_pointers(re_syntax_base* state);
void fixup_recursions(re_syntax_base* state);
void create_startmaps(re_syntax_base* state);
int calculate_backstep(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
@ -263,7 +265,7 @@ private:
template <class charT, class traits>
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0)
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
{
m_pdata->m_data.clear();
m_pdata->m_status = ::boost::regex_constants::error_ok;
@ -692,6 +694,13 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
// fixup pointers in the machine:
fixup_pointers(m_pdata->m_first_state);
if(m_has_recursions)
{
m_pdata->m_has_recursions = true;
fixup_recursions(m_pdata->m_first_state);
}
else
m_pdata->m_has_recursions = false;
// create nested startmaps:
create_startmaps(m_pdata->m_first_state);
// create main startmap:
@ -713,6 +722,13 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
{
switch(state->type)
{
case syntax_element_recurse:
m_has_recursions = true;
if(state->next.i)
state->next.p = getaddress(state->next.i, state);
else
state->next.p = 0;
break;
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
@ -738,6 +754,65 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
}
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
{
re_syntax_base* base = state;
while(state)
{
switch(state->type)
{
case syntax_element_recurse:
{
bool ok = false;
re_syntax_base* p = base;
/*
if(static_cast<re_jump*>(state)->alt.i == 0)
{
ok = true;
static_cast<re_jump*>(state)->alt.p = p;
}
else
{*/
while(p)
{
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == static_cast<re_jump*>(state)->alt.i))
{
static_cast<re_jump*>(state)->alt.p = p;
ok = true;
break;
}
p = p->next.p;
}
//}
if(!ok)
{
// recursion to sub-expression that doesn't exist:
if(0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
//
// clear the expression, we should be empty:
//
this->m_pdata->m_expression = 0;
this->m_pdata->m_expression_len = 0;
//
// and throw if required:
//
if(0 == (this->flags() & regex_constants::no_except))
{
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
e.raise();
}
}
}
default:
break;
}
state = state->next.p;
}
}
template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
{
@ -953,6 +1028,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
create_startmap(state->next.p, 0, pnull, mask);
return;
}
case syntax_element_recurse:
case syntax_element_backref:
// can be null, and any character can match:
if(pnull)

View File

@ -125,8 +125,16 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2,
switch(l_flags & regbase::main_option_type)
{
case regbase::perl_syntax_group:
m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
break;
{
m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
//
// Add a leading paren with index zero to give recursions a target:
//
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
br->index = 0;
br->icase = this->flags() & regbase::icase;
break;
}
case regbase::basic_syntax_group:
m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
break;
@ -387,6 +395,7 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
}
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->index = markid;
pb->icase = this->flags() & regbase::icase;
std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
@ -399,6 +408,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
bool old_case_change = m_has_case_change;
m_has_case_change = false; // no changes to this scope as yet...
//
// Back up branch reset data in case we have a nested (?|...)
//
int mark_reset = m_mark_reset;
m_mark_reset = -1;
//
// now recursively add more states, this will terminate when we get to a
// matching ')' :
//
@ -423,6 +437,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
this->flags(opts);
m_has_case_change = old_case_change;
//
// restore branch reset:
//
m_mark_reset = mark_reset;
//
// we either have a ')' or we have run out of characters prematurely:
//
if(m_position == m_end)
@ -444,6 +462,7 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
//
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid;
pb->icase = this->flags() & regbase::icase;
this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:
@ -729,6 +748,7 @@ escape_type_class_jump:
{
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->index = -5;
pb->icase = this->flags() & regbase::icase;
this->m_pdata->m_data.align();
++m_position;
return true;
@ -795,6 +815,7 @@ escape_type_class_jump:
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
pb->icase = this->flags() & regbase::icase;
}
else
{
@ -946,11 +967,13 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
{
re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
pb->index = -3;
pb->icase = this->flags() & regbase::icase;
re_jump* jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
this->m_pdata->m_data.align();
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = -3;
pb->icase = this->flags() & regbase::icase;
}
return true;
}
@ -1706,6 +1729,7 @@ bool basic_regex_parser<charT, traits>::parse_backref()
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
pb->index = i;
pb->icase = this->flags() & regbase::icase;
}
else
{
@ -1793,6 +1817,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
int markid = 0;
std::ptrdiff_t jump_offset = 0;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
pb->icase = this->flags() & regbase::icase;
std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
@ -1806,6 +1831,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
charT name_delim;
int mark_reset = m_mark_reset;
m_mark_reset = -1;
int v;
//
// select the actual extension used:
//
@ -1821,6 +1847,57 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
pb->index = markid = 0;
++m_position;
break;
case regex_constants::syntax_digit:
{
//
// a recursive subexpression:
//
v = this->m_traits.toi(m_position, m_end, 10);
if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
fail(regex_constants::error_backref, m_position - m_base);
return false;
}
insert_recursion:
pb->index = markid = 0;
static_cast<re_jump*>(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v;
static_cast<re_case*>(
this->append_state(syntax_element_toggle_case, sizeof(re_case))
)->icase = this->flags() & regbase::icase;
break;
}
case regex_constants::syntax_plus:
//
// A forward-relative recursive subexpression:
//
++m_position;
v = this->m_traits.toi(m_position, m_end, 10);
if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
fail(regex_constants::error_backref, m_position - m_base);
return false;
}
v += m_mark_count;
goto insert_recursion;
case regex_constants::syntax_dash:
//
// Possibly a backward-relative recursive subexpression:
//
++m_position;
v = this->m_traits.toi(m_position, m_end, 10);
if(v <= 0)
{
--m_position;
// Oops not a relative recursion at all, but a (?-imsx) group:
goto option_group_jump;
}
v = m_mark_count + 1 - v;
if(v <= 0)
{
fail(regex_constants::error_backref, m_position - m_base);
return false;
}
goto insert_recursion;
case regex_constants::syntax_equal:
pb->index = markid = -1;
++m_position;
@ -1882,7 +1959,24 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
return false;
}
int v = this->m_traits.toi(m_position, m_end, 10);
if(v > 0)
if(*m_position == charT('R'))
{
++m_position;
v = -this->m_traits.toi(m_position, m_end, 10);
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
br->index = v < 0 ? (v - 1) : 0;
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
if(++m_position == m_end)
{
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
}
}
else if(v > 0)
{
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
br->index = v;
@ -1976,9 +2070,21 @@ named_capture_jump:
break;
}
default:
if(*m_position == charT('R'))
{
++m_position;
v = 0;
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
{
fail(regex_constants::error_backref, m_position - m_base);
return false;
}
goto insert_recursion;
}
//
// lets assume that we have a (?imsx) group and try and parse it:
//
option_group_jump:
regex_constants::syntax_option_type opts = parse_options();
if(m_position == m_end)
return false;
@ -2095,6 +2201,7 @@ named_capture_jump:
//
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid;
pb->icase = this->flags() & regbase::icase;
this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:

View File

@ -285,7 +285,8 @@ public:
}
~repeater_count()
{
*stack = next;
if(next)
*stack = next;
}
std::size_t get_count() { return count; }
int get_id() { return state_id; }
@ -325,6 +326,17 @@ enum saved_state_type
saved_state_count = 14
};
template <class Results>
struct recursion_info
{
typedef typename Results::value_type value_type;
typedef typename value_type::iterator iterator;
int id;
const re_syntax_base* preturn_address;
Results results;
repeater_count<iterator>* repeater_stack;
};
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable : 4251 4231 4660)
@ -340,6 +352,7 @@ public:
typedef std::size_t traits_size_type;
typedef typename is_byte<char_type>::width_type width_type;
typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
typedef match_results<BidiIterator, Allocator> results_type;
perl_matcher(BidiIterator first, BidiIterator end,
match_results<BidiIterator, Allocator>& what,
@ -348,7 +361,7 @@ public:
BidiIterator l_base)
: m_result(what), base(first), last(end),
position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
m_independent(false), next_count(&rep_obj), rep_obj(&next_count), recursion_stack_position(0)
{
construct_init(e, f);
}
@ -413,6 +426,7 @@ private:
#ifdef BOOST_REGEX_RECURSIVE
bool backtrack_till_match(std::size_t count);
#endif
bool match_recursion();
// find procs stored in s_find_vtable:
bool find_restart_any();
@ -468,6 +482,9 @@ private:
typename traits::char_class_type m_word_mask;
// the bitmask to use when determining whether a match_any matches a newline or not:
unsigned char match_any_mask;
// recursion information:
recursion_info<results_type> recursion_stack[50];
unsigned recursion_stack_position;
#ifdef BOOST_REGEX_NON_RECURSIVE
//
@ -491,6 +508,8 @@ private:
bool unwind_short_set_repeat(bool);
bool unwind_long_set_repeat(bool);
bool unwind_non_greedy_repeat(bool);
bool unwind_recursion(bool);
bool unwind_recursion_pop(bool);
void destroy_single_repeat();
void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
void push_recursion_stopper();
@ -499,7 +518,8 @@ private:
void push_repeater_count(int i, repeater_count<BidiIterator>** s);
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
void push_non_greedy_repeat(const re_syntax_base* ps);
void push_recursion(int id, const re_syntax_base* p, results_type* presults);
void push_recursion_pop();
// pointer to base of stack:
saved_state* m_stack_base;

View File

@ -345,25 +345,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
return m_has_found_match;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
{
int index = static_cast<const re_brace*>(pstate)->index;
if(index > 0)
{
if((m_match_flags & match_nosubs) == 0)
m_presult->set_second(position, index);
}
else if((index < 0) && (index != -4))
{
// matched forward lookahead:
pstate = 0;
return true;
}
pstate = pstate->next.p;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
{
@ -464,35 +445,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
{
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
return false;
if((m_match_flags & match_all) && (position != last))
return false;
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
return false;
m_presult->set_second(position);
pstate = 0;
m_has_found_match = true;
if((m_match_flags & match_posix) == match_posix)
{
m_result.maybe_assign(*m_presult);
if((m_match_flags & match_any) == 0)
return false;
}
#ifdef BOOST_REGEX_MATCH_EXTRA
if(match_extra & m_match_flags)
{
for(unsigned i = 0; i < m_presult->size(); ++i)
if((*m_presult)[i].matched)
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
}
#endif
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
{
@ -760,8 +712,21 @@ template <class BidiIterator, class Allocator, class traits>
inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
{
// return true if marked sub-expression N has been matched:
bool result = (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
pstate = pstate->next.p;
int index = static_cast<const re_brace*>(pstate)->index;
bool result;
if(index > 0)
{
// Have we matched subexpression "index"?
result = (*m_presult)[index].matched;
pstate = pstate->next.p;
}
else
{
// Have we recursed into subexpression "index"?
// If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == -index-1) || (index == 0));
pstate = pstate->next.p;
}
return result;
}

View File

@ -127,10 +127,21 @@ struct saved_single_repeat : public saved_state
: saved_state(arg_id), count(c), rep(r), last_position(lp){}
};
template <class Results>
struct saved_recursion : public saved_state
{
saved_recursion(int id, const re_syntax_base* p, Results* pr)
: saved_state(14), recursion_id(id), preturn_address(p), results(*pr)
{}
int recursion_id;
const re_syntax_base* preturn_address;
Results results;
};
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[29] =
static matcher_proc_type const s_match_vtable[30] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -165,6 +176,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
};
push_recursion_stopper();
@ -316,10 +328,26 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(st
m_backup_state = pmp;
}
template <class BidiIterator, class Allocator, class traits>
inline void perl_matcher<BidiIterator, Allocator, traits>::push_recursion(int id, const re_syntax_base* p, results_type* presults)
{
saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
--pmp;
}
(void) new (pmp)saved_recursion<results_type>(id, p, presults);
m_backup_state = pmp;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
{
int index = static_cast<const re_brace*>(pstate)->index;
icase = static_cast<const re_brace*>(pstate)->icase;
switch(index)
{
case 0:
@ -859,6 +887,100 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
#endif
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
{
BOOST_ASSERT(pstate->type == syntax_element_recurse);
//
// Backup call stack:
//
push_recursion_pop();
//
// Set new call stack:
//
if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
{
return false;
}
recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
recursion_stack[recursion_stack_position].results = *m_presult;
pstate = static_cast<const re_jump*>(pstate)->alt.p;
recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
++recursion_stack_position;
//BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id);
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
{
int index = static_cast<const re_brace*>(pstate)->index;
icase = static_cast<const re_brace*>(pstate)->icase;
if(index > 0)
{
if((m_match_flags & match_nosubs) == 0)
{
m_presult->set_second(position, index);
}
if(recursion_stack_position)
{
if(index == recursion_stack[recursion_stack_position-1].id)
{
--recursion_stack_position;
pstate = recursion_stack[recursion_stack_position].preturn_address;
*m_presult = recursion_stack[recursion_stack_position].results;
push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
}
}
}
else if((index < 0) && (index != -4))
{
// matched forward lookahead:
pstate = 0;
return true;
}
pstate = pstate->next.p;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
{
if(recursion_stack_position)
{
BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
--recursion_stack_position;
pstate = recursion_stack[recursion_stack_position].preturn_address;
*m_presult = recursion_stack[recursion_stack_position].results;
push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
return true;
}
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
return false;
if((m_match_flags & match_all) && (position != last))
return false;
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
return false;
m_presult->set_second(position);
pstate = 0;
m_has_found_match = true;
if((m_match_flags & match_posix) == match_posix)
{
m_result.maybe_assign(*m_presult);
if((m_match_flags & match_any) == 0)
return false;
}
#ifdef BOOST_REGEX_MATCH_EXTRA
if(match_extra & m_match_flags)
{
for(unsigned i = 0; i < m_presult->size(); ++i)
if((*m_presult)[i].matched)
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
}
#endif
return true;
}
/****************************************************************************
Unwind and associated proceedures follow, these perform what normal stack
@ -869,7 +991,7 @@ unwinding does in the recursive implementation.
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
{
static unwind_proc_type const s_unwind_table[14] =
static unwind_proc_type const s_unwind_table[18] =
{
&perl_matcher<BidiIterator, Allocator, traits>::unwind_end,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_paren,
@ -885,6 +1007,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
&perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop,
};
m_recursive_result = have_match;
@ -1388,6 +1512,106 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat(boo
return r;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r)
{
saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
if(!r)
{
recursion_stack[recursion_stack_position].id = pmp->recursion_id;
recursion_stack[recursion_stack_position].preturn_address = pmp->preturn_address;
recursion_stack[recursion_stack_position].results = pmp->results;
++recursion_stack_position;
}
boost::re_detail::inplace_destroy(pmp++);
m_backup_state = pmp;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop(bool r)
{
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
if(!r)
{
--recursion_stack_position;
}
boost::re_detail::inplace_destroy(pmp++);
m_backup_state = pmp;
return true;
}
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop()
{
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = static_cast<saved_state*>(m_backup_state);
--pmp;
}
(void) new (pmp)saved_state(15);
m_backup_state = pmp;
}
/*
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r)
{
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
if(!r)
{
--parenthesis_stack_position;
}
boost::re_detail::inplace_destroy(pmp++);
m_backup_state = pmp;
return true;
}
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_pop()
{
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = static_cast<saved_state*>(m_backup_state);
--pmp;
}
(void) new (pmp)saved_state(16);
m_backup_state = pmp;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_push(bool r)
{
saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
if(!r)
{
parenthesis_stack[parenthesis_stack_position++] = pmp->position;
}
boost::re_detail::inplace_destroy(pmp++);
m_backup_state = pmp;
return true;
}
template <class BidiIterator, class Allocator, class traits>
inline void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_push(BidiIterator p)
{
saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
--pmp;
if(pmp < m_stack_base)
{
extend_stack();
pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
--pmp;
}
(void) new (pmp)saved_position<BidiIterator>(0, p, 17);
m_backup_state = pmp;
}
*/
} // namespace re_detail
} // namespace boost

View File

@ -60,7 +60,7 @@ public:
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[29] =
static matcher_proc_type const s_match_vtable[30] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -95,6 +95,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
};
if(state_count > max_state_count)
@ -117,6 +118,7 @@ template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
{
int index = static_cast<const re_brace*>(pstate)->index;
icase = static_cast<const re_brace*>(pstate)->icase;
bool r = true;
switch(index)
{
@ -848,6 +850,127 @@ bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::si
#endif
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
{
BOOST_ASSERT(pstate->type == syntax_element_recurse);
//
// Set new call stack:
//
if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
{
return false;
}
recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
recursion_stack[recursion_stack_position].results = *m_presult;
recursion_stack[recursion_stack_position].repeater_stack = next_count;
pstate = static_cast<const re_jump*>(pstate)->alt.p;
recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
++recursion_stack_position;
repeater_count<BidiIterator>* saved = next_count;
repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
next_count = &r;
bool result = match_all_states();
next_count = saved;
if(!result)
{
--recursion_stack_position;
next_count = recursion_stack[recursion_stack_position].repeater_stack;
*m_presult = recursion_stack[recursion_stack_position].results;
return false;
}
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
{
int index = static_cast<const re_brace*>(pstate)->index;
icase = static_cast<const re_brace*>(pstate)->icase;
if(index > 0)
{
if((m_match_flags & match_nosubs) == 0)
{
m_presult->set_second(position, index);
}
if(recursion_stack_position)
{
if(index == recursion_stack[recursion_stack_position-1].id)
{
--recursion_stack_position;
recursion_info<results_type> saved = recursion_stack[recursion_stack_position];
const re_syntax_base* saved_state = pstate = saved.preturn_address;
repeater_count<BidiIterator>* saved_count = next_count;
next_count = saved.repeater_stack;
*m_presult = saved.results;
if(!match_all_states())
{
recursion_stack[recursion_stack_position] = saved;
++recursion_stack_position;
next_count = saved_count;
return false;
}
}
}
}
else if((index < 0) && (index != -4))
{
// matched forward lookahead:
pstate = 0;
return true;
}
pstate = pstate ? pstate->next.p : 0;
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
{
if(recursion_stack_position)
{
BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
--recursion_stack_position;
const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address;
*m_presult = recursion_stack[recursion_stack_position].results;
if(!match_all_states())
{
recursion_stack[recursion_stack_position].preturn_address = saved_state;
recursion_stack[recursion_stack_position].results = *m_presult;
++recursion_stack_position;
return false;
}
return true;
}
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
return false;
if((m_match_flags & match_all) && (position != last))
return false;
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
return false;
m_presult->set_second(position);
pstate = 0;
m_has_found_match = true;
if((m_match_flags & match_posix) == match_posix)
{
m_result.maybe_assign(*m_presult);
if((m_match_flags & match_any) == 0)
return false;
}
#ifdef BOOST_REGEX_MATCH_EXTRA
if(match_extra & m_match_flags)
{
for(unsigned i = 0; i < m_presult->size(); ++i)
if((*m_presult)[i].matched)
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
}
#endif
return true;
}
} // namespace re_detail
} // namespace boost
#ifdef BOOST_MSVC

View File

@ -118,7 +118,9 @@ enum syntax_element_type
syntax_element_backstep = syntax_element_long_set_rep + 1,
// an assertion that a mark was matched:
syntax_element_assert_backref = syntax_element_backstep + 1,
syntax_element_toggle_case = syntax_element_assert_backref + 1
syntax_element_toggle_case = syntax_element_assert_backref + 1,
// a recursive expression:
syntax_element_recurse = syntax_element_toggle_case + 1
};
#ifdef BOOST_REGEX_DEBUG
@ -156,6 +158,7 @@ struct re_brace : public re_syntax_base
// The index to match, can be zero (don't mark the sub-expression)
// or negative (for perl style (?...) extentions):
int index;
bool icase;
};
/*** struct re_dot **************************************************