mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 22:02:08 +02:00
Added initial support for recursive expressions.
Updated docs and tests accordingly. [SVN r54994]
This commit is contained in:
@ -194,6 +194,7 @@ struct regex_data : public named_subexpressions<charT>
|
||||
std::vector<
|
||||
std::pair<
|
||||
std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
|
||||
bool m_has_recursions; // whether we have recursive expressions;
|
||||
};
|
||||
//
|
||||
// class basic_regex_implementation
|
||||
|
@ -240,6 +240,7 @@ protected:
|
||||
bool m_has_backrefs; // true if there are actually any backrefs
|
||||
unsigned m_backrefs; // bitmask of permitted backrefs
|
||||
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
|
||||
bool m_has_recursions; // set when we have recursive expresisons to fixup
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
|
||||
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
|
||||
@ -250,6 +251,7 @@ private:
|
||||
basic_regex_creator(const basic_regex_creator&);
|
||||
|
||||
void fixup_pointers(re_syntax_base* state);
|
||||
void fixup_recursions(re_syntax_base* state);
|
||||
void create_startmaps(re_syntax_base* state);
|
||||
int calculate_backstep(re_syntax_base* state);
|
||||
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
|
||||
@ -263,7 +265,7 @@ private:
|
||||
|
||||
template <class charT, class traits>
|
||||
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
|
||||
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0)
|
||||
: m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
|
||||
{
|
||||
m_pdata->m_data.clear();
|
||||
m_pdata->m_status = ::boost::regex_constants::error_ok;
|
||||
@ -692,6 +694,13 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
|
||||
m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
|
||||
// fixup pointers in the machine:
|
||||
fixup_pointers(m_pdata->m_first_state);
|
||||
if(m_has_recursions)
|
||||
{
|
||||
m_pdata->m_has_recursions = true;
|
||||
fixup_recursions(m_pdata->m_first_state);
|
||||
}
|
||||
else
|
||||
m_pdata->m_has_recursions = false;
|
||||
// create nested startmaps:
|
||||
create_startmaps(m_pdata->m_first_state);
|
||||
// create main startmap:
|
||||
@ -713,6 +722,13 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_recurse:
|
||||
m_has_recursions = true;
|
||||
if(state->next.i)
|
||||
state->next.p = getaddress(state->next.i, state);
|
||||
else
|
||||
state->next.p = 0;
|
||||
break;
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
@ -738,6 +754,65 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
|
||||
{
|
||||
re_syntax_base* base = state;
|
||||
while(state)
|
||||
{
|
||||
switch(state->type)
|
||||
{
|
||||
case syntax_element_recurse:
|
||||
{
|
||||
bool ok = false;
|
||||
re_syntax_base* p = base;
|
||||
/*
|
||||
if(static_cast<re_jump*>(state)->alt.i == 0)
|
||||
{
|
||||
ok = true;
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
}
|
||||
else
|
||||
{*/
|
||||
while(p)
|
||||
{
|
||||
if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == static_cast<re_jump*>(state)->alt.i))
|
||||
{
|
||||
static_cast<re_jump*>(state)->alt.p = p;
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
p = p->next.p;
|
||||
}
|
||||
//}
|
||||
if(!ok)
|
||||
{
|
||||
// recursion to sub-expression that doesn't exist:
|
||||
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
||||
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
||||
//
|
||||
// clear the expression, we should be empty:
|
||||
//
|
||||
this->m_pdata->m_expression = 0;
|
||||
this->m_pdata->m_expression_len = 0;
|
||||
//
|
||||
// and throw if required:
|
||||
//
|
||||
if(0 == (this->flags() & regex_constants::no_except))
|
||||
{
|
||||
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
|
||||
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
||||
e.raise();
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
state = state->next.p;
|
||||
}
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
{
|
||||
@ -953,6 +1028,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
create_startmap(state->next.p, 0, pnull, mask);
|
||||
return;
|
||||
}
|
||||
case syntax_element_recurse:
|
||||
case syntax_element_backref:
|
||||
// can be null, and any character can match:
|
||||
if(pnull)
|
||||
|
@ -125,8 +125,16 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2,
|
||||
switch(l_flags & regbase::main_option_type)
|
||||
{
|
||||
case regbase::perl_syntax_group:
|
||||
m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
|
||||
break;
|
||||
{
|
||||
m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
|
||||
//
|
||||
// Add a leading paren with index zero to give recursions a target:
|
||||
//
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
br->index = 0;
|
||||
br->icase = this->flags() & regbase::icase;
|
||||
break;
|
||||
}
|
||||
case regbase::basic_syntax_group:
|
||||
m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
|
||||
break;
|
||||
@ -387,6 +395,7 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
}
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
// back up insertion point for alternations, and set new point:
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
@ -399,6 +408,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
bool old_case_change = m_has_case_change;
|
||||
m_has_case_change = false; // no changes to this scope as yet...
|
||||
//
|
||||
// Back up branch reset data in case we have a nested (?|...)
|
||||
//
|
||||
int mark_reset = m_mark_reset;
|
||||
m_mark_reset = -1;
|
||||
//
|
||||
// now recursively add more states, this will terminate when we get to a
|
||||
// matching ')' :
|
||||
//
|
||||
@ -423,6 +437,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
this->flags(opts);
|
||||
m_has_case_change = old_case_change;
|
||||
//
|
||||
// restore branch reset:
|
||||
//
|
||||
m_mark_reset = mark_reset;
|
||||
//
|
||||
// we either have a ')' or we have run out of characters prematurely:
|
||||
//
|
||||
if(m_position == m_end)
|
||||
@ -444,6 +462,7 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
//
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
this->m_paren_start = last_paren_start;
|
||||
//
|
||||
// restore the alternate insertion point:
|
||||
@ -729,6 +748,7 @@ escape_type_class_jump:
|
||||
{
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = -5;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
this->m_pdata->m_data.align();
|
||||
++m_position;
|
||||
return true;
|
||||
@ -795,6 +815,7 @@ escape_type_class_jump:
|
||||
m_position = pc;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
pb->index = i;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -946,11 +967,13 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
{
|
||||
re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->index = -3;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
re_jump* jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
|
||||
this->m_pdata->m_data.align();
|
||||
jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = -3;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1706,6 +1729,7 @@ bool basic_regex_parser<charT, traits>::parse_backref()
|
||||
m_position = pc;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
||||
pb->index = i;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1793,6 +1817,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
int markid = 0;
|
||||
std::ptrdiff_t jump_offset = 0;
|
||||
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
// back up insertion point for alternations, and set new point:
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
@ -1806,6 +1831,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
charT name_delim;
|
||||
int mark_reset = m_mark_reset;
|
||||
m_mark_reset = -1;
|
||||
int v;
|
||||
//
|
||||
// select the actual extension used:
|
||||
//
|
||||
@ -1821,6 +1847,57 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
pb->index = markid = 0;
|
||||
++m_position;
|
||||
break;
|
||||
case regex_constants::syntax_digit:
|
||||
{
|
||||
//
|
||||
// a recursive subexpression:
|
||||
//
|
||||
v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
insert_recursion:
|
||||
pb->index = markid = 0;
|
||||
static_cast<re_jump*>(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v;
|
||||
static_cast<re_case*>(
|
||||
this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
||||
)->icase = this->flags() & regbase::icase;
|
||||
break;
|
||||
}
|
||||
case regex_constants::syntax_plus:
|
||||
//
|
||||
// A forward-relative recursive subexpression:
|
||||
//
|
||||
++m_position;
|
||||
v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
v += m_mark_count;
|
||||
goto insert_recursion;
|
||||
case regex_constants::syntax_dash:
|
||||
//
|
||||
// Possibly a backward-relative recursive subexpression:
|
||||
//
|
||||
++m_position;
|
||||
v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if(v <= 0)
|
||||
{
|
||||
--m_position;
|
||||
// Oops not a relative recursion at all, but a (?-imsx) group:
|
||||
goto option_group_jump;
|
||||
}
|
||||
v = m_mark_count + 1 - v;
|
||||
if(v <= 0)
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
goto insert_recursion;
|
||||
case regex_constants::syntax_equal:
|
||||
pb->index = markid = -1;
|
||||
++m_position;
|
||||
@ -1882,7 +1959,24 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
return false;
|
||||
}
|
||||
int v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if(v > 0)
|
||||
if(*m_position == charT('R'))
|
||||
{
|
||||
++m_position;
|
||||
v = -this->m_traits.toi(m_position, m_end, 10);
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = v < 0 ? (v - 1) : 0;
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
if(++m_position == m_end)
|
||||
{
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(v > 0)
|
||||
{
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = v;
|
||||
@ -1976,9 +2070,21 @@ named_capture_jump:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if(*m_position == charT('R'))
|
||||
{
|
||||
++m_position;
|
||||
v = 0;
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
{
|
||||
fail(regex_constants::error_backref, m_position - m_base);
|
||||
return false;
|
||||
}
|
||||
goto insert_recursion;
|
||||
}
|
||||
//
|
||||
// lets assume that we have a (?imsx) group and try and parse it:
|
||||
//
|
||||
option_group_jump:
|
||||
regex_constants::syntax_option_type opts = parse_options();
|
||||
if(m_position == m_end)
|
||||
return false;
|
||||
@ -2095,6 +2201,7 @@ named_capture_jump:
|
||||
//
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
pb->icase = this->flags() & regbase::icase;
|
||||
this->m_paren_start = last_paren_start;
|
||||
//
|
||||
// restore the alternate insertion point:
|
||||
|
@ -285,7 +285,8 @@ public:
|
||||
}
|
||||
~repeater_count()
|
||||
{
|
||||
*stack = next;
|
||||
if(next)
|
||||
*stack = next;
|
||||
}
|
||||
std::size_t get_count() { return count; }
|
||||
int get_id() { return state_id; }
|
||||
@ -325,6 +326,17 @@ enum saved_state_type
|
||||
saved_state_count = 14
|
||||
};
|
||||
|
||||
template <class Results>
|
||||
struct recursion_info
|
||||
{
|
||||
typedef typename Results::value_type value_type;
|
||||
typedef typename value_type::iterator iterator;
|
||||
int id;
|
||||
const re_syntax_base* preturn_address;
|
||||
Results results;
|
||||
repeater_count<iterator>* repeater_stack;
|
||||
};
|
||||
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4251 4231 4660)
|
||||
@ -340,6 +352,7 @@ public:
|
||||
typedef std::size_t traits_size_type;
|
||||
typedef typename is_byte<char_type>::width_type width_type;
|
||||
typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
|
||||
typedef match_results<BidiIterator, Allocator> results_type;
|
||||
|
||||
perl_matcher(BidiIterator first, BidiIterator end,
|
||||
match_results<BidiIterator, Allocator>& what,
|
||||
@ -348,7 +361,7 @@ public:
|
||||
BidiIterator l_base)
|
||||
: m_result(what), base(first), last(end),
|
||||
position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
|
||||
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
|
||||
m_independent(false), next_count(&rep_obj), rep_obj(&next_count), recursion_stack_position(0)
|
||||
{
|
||||
construct_init(e, f);
|
||||
}
|
||||
@ -413,6 +426,7 @@ private:
|
||||
#ifdef BOOST_REGEX_RECURSIVE
|
||||
bool backtrack_till_match(std::size_t count);
|
||||
#endif
|
||||
bool match_recursion();
|
||||
|
||||
// find procs stored in s_find_vtable:
|
||||
bool find_restart_any();
|
||||
@ -468,6 +482,9 @@ private:
|
||||
typename traits::char_class_type m_word_mask;
|
||||
// the bitmask to use when determining whether a match_any matches a newline or not:
|
||||
unsigned char match_any_mask;
|
||||
// recursion information:
|
||||
recursion_info<results_type> recursion_stack[50];
|
||||
unsigned recursion_stack_position;
|
||||
|
||||
#ifdef BOOST_REGEX_NON_RECURSIVE
|
||||
//
|
||||
@ -491,6 +508,8 @@ private:
|
||||
bool unwind_short_set_repeat(bool);
|
||||
bool unwind_long_set_repeat(bool);
|
||||
bool unwind_non_greedy_repeat(bool);
|
||||
bool unwind_recursion(bool);
|
||||
bool unwind_recursion_pop(bool);
|
||||
void destroy_single_repeat();
|
||||
void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
|
||||
void push_recursion_stopper();
|
||||
@ -499,7 +518,8 @@ private:
|
||||
void push_repeater_count(int i, repeater_count<BidiIterator>** s);
|
||||
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
|
||||
void push_non_greedy_repeat(const re_syntax_base* ps);
|
||||
|
||||
void push_recursion(int id, const re_syntax_base* p, results_type* presults);
|
||||
void push_recursion_pop();
|
||||
|
||||
// pointer to base of stack:
|
||||
saved_state* m_stack_base;
|
||||
|
@ -345,25 +345,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
|
||||
return m_has_found_match;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
if(index > 0)
|
||||
{
|
||||
if((m_match_flags & match_nosubs) == 0)
|
||||
m_presult->set_second(position, index);
|
||||
}
|
||||
else if((index < 0) && (index != -4))
|
||||
{
|
||||
// matched forward lookahead:
|
||||
pstate = 0;
|
||||
return true;
|
||||
}
|
||||
pstate = pstate->next.p;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
|
||||
{
|
||||
@ -464,35 +445,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
|
||||
{
|
||||
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
|
||||
return false;
|
||||
if((m_match_flags & match_all) && (position != last))
|
||||
return false;
|
||||
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
|
||||
return false;
|
||||
m_presult->set_second(position);
|
||||
pstate = 0;
|
||||
m_has_found_match = true;
|
||||
if((m_match_flags & match_posix) == match_posix)
|
||||
{
|
||||
m_result.maybe_assign(*m_presult);
|
||||
if((m_match_flags & match_any) == 0)
|
||||
return false;
|
||||
}
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
if(match_extra & m_match_flags)
|
||||
{
|
||||
for(unsigned i = 0; i < m_presult->size(); ++i)
|
||||
if((*m_presult)[i].matched)
|
||||
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
|
||||
{
|
||||
@ -760,8 +712,21 @@ template <class BidiIterator, class Allocator, class traits>
|
||||
inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
|
||||
{
|
||||
// return true if marked sub-expression N has been matched:
|
||||
bool result = (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
|
||||
pstate = pstate->next.p;
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
bool result;
|
||||
if(index > 0)
|
||||
{
|
||||
// Have we matched subexpression "index"?
|
||||
result = (*m_presult)[index].matched;
|
||||
pstate = pstate->next.p;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Have we recursed into subexpression "index"?
|
||||
// If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
|
||||
result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == -index-1) || (index == 0));
|
||||
pstate = pstate->next.p;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -127,10 +127,21 @@ struct saved_single_repeat : public saved_state
|
||||
: saved_state(arg_id), count(c), rep(r), last_position(lp){}
|
||||
};
|
||||
|
||||
template <class Results>
|
||||
struct saved_recursion : public saved_state
|
||||
{
|
||||
saved_recursion(int id, const re_syntax_base* p, Results* pr)
|
||||
: saved_state(14), recursion_id(id), preturn_address(p), results(*pr)
|
||||
{}
|
||||
int recursion_id;
|
||||
const re_syntax_base* preturn_address;
|
||||
Results results;
|
||||
};
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[29] =
|
||||
static matcher_proc_type const s_match_vtable[30] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -165,6 +176,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
|
||||
};
|
||||
|
||||
push_recursion_stopper();
|
||||
@ -316,10 +328,26 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(st
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
inline void perl_matcher<BidiIterator, Allocator, traits>::push_recursion(int id, const re_syntax_base* p, results_type* presults)
|
||||
{
|
||||
saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
|
||||
--pmp;
|
||||
if(pmp < m_stack_base)
|
||||
{
|
||||
extend_stack();
|
||||
pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
|
||||
--pmp;
|
||||
}
|
||||
(void) new (pmp)saved_recursion<results_type>(id, p, presults);
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
icase = static_cast<const re_brace*>(pstate)->icase;
|
||||
switch(index)
|
||||
{
|
||||
case 0:
|
||||
@ -859,6 +887,100 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
|
||||
{
|
||||
BOOST_ASSERT(pstate->type == syntax_element_recurse);
|
||||
//
|
||||
// Backup call stack:
|
||||
//
|
||||
push_recursion_pop();
|
||||
//
|
||||
// Set new call stack:
|
||||
//
|
||||
if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
|
||||
recursion_stack[recursion_stack_position].results = *m_presult;
|
||||
pstate = static_cast<const re_jump*>(pstate)->alt.p;
|
||||
recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
|
||||
++recursion_stack_position;
|
||||
//BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
icase = static_cast<const re_brace*>(pstate)->icase;
|
||||
if(index > 0)
|
||||
{
|
||||
if((m_match_flags & match_nosubs) == 0)
|
||||
{
|
||||
m_presult->set_second(position, index);
|
||||
}
|
||||
if(recursion_stack_position)
|
||||
{
|
||||
if(index == recursion_stack[recursion_stack_position-1].id)
|
||||
{
|
||||
--recursion_stack_position;
|
||||
pstate = recursion_stack[recursion_stack_position].preturn_address;
|
||||
*m_presult = recursion_stack[recursion_stack_position].results;
|
||||
push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if((index < 0) && (index != -4))
|
||||
{
|
||||
// matched forward lookahead:
|
||||
pstate = 0;
|
||||
return true;
|
||||
}
|
||||
pstate = pstate->next.p;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
|
||||
{
|
||||
if(recursion_stack_position)
|
||||
{
|
||||
BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
|
||||
--recursion_stack_position;
|
||||
pstate = recursion_stack[recursion_stack_position].preturn_address;
|
||||
*m_presult = recursion_stack[recursion_stack_position].results;
|
||||
push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results);
|
||||
return true;
|
||||
}
|
||||
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
|
||||
return false;
|
||||
if((m_match_flags & match_all) && (position != last))
|
||||
return false;
|
||||
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
|
||||
return false;
|
||||
m_presult->set_second(position);
|
||||
pstate = 0;
|
||||
m_has_found_match = true;
|
||||
if((m_match_flags & match_posix) == match_posix)
|
||||
{
|
||||
m_result.maybe_assign(*m_presult);
|
||||
if((m_match_flags & match_any) == 0)
|
||||
return false;
|
||||
}
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
if(match_extra & m_match_flags)
|
||||
{
|
||||
for(unsigned i = 0; i < m_presult->size(); ++i)
|
||||
if((*m_presult)[i].matched)
|
||||
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
||||
Unwind and associated proceedures follow, these perform what normal stack
|
||||
@ -869,7 +991,7 @@ unwinding does in the recursive implementation.
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
|
||||
{
|
||||
static unwind_proc_type const s_unwind_table[14] =
|
||||
static unwind_proc_type const s_unwind_table[18] =
|
||||
{
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_end,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_paren,
|
||||
@ -885,6 +1007,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop,
|
||||
};
|
||||
|
||||
m_recursive_result = have_match;
|
||||
@ -1388,6 +1512,106 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat(boo
|
||||
return r;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r)
|
||||
{
|
||||
saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state);
|
||||
if(!r)
|
||||
{
|
||||
recursion_stack[recursion_stack_position].id = pmp->recursion_id;
|
||||
recursion_stack[recursion_stack_position].preturn_address = pmp->preturn_address;
|
||||
recursion_stack[recursion_stack_position].results = pmp->results;
|
||||
++recursion_stack_position;
|
||||
}
|
||||
boost::re_detail::inplace_destroy(pmp++);
|
||||
m_backup_state = pmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop(bool r)
|
||||
{
|
||||
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
|
||||
if(!r)
|
||||
{
|
||||
--recursion_stack_position;
|
||||
}
|
||||
boost::re_detail::inplace_destroy(pmp++);
|
||||
m_backup_state = pmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop()
|
||||
{
|
||||
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
|
||||
--pmp;
|
||||
if(pmp < m_stack_base)
|
||||
{
|
||||
extend_stack();
|
||||
pmp = static_cast<saved_state*>(m_backup_state);
|
||||
--pmp;
|
||||
}
|
||||
(void) new (pmp)saved_state(15);
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
/*
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r)
|
||||
{
|
||||
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
|
||||
if(!r)
|
||||
{
|
||||
--parenthesis_stack_position;
|
||||
}
|
||||
boost::re_detail::inplace_destroy(pmp++);
|
||||
m_backup_state = pmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_pop()
|
||||
{
|
||||
saved_state* pmp = static_cast<saved_state*>(m_backup_state);
|
||||
--pmp;
|
||||
if(pmp < m_stack_base)
|
||||
{
|
||||
extend_stack();
|
||||
pmp = static_cast<saved_state*>(m_backup_state);
|
||||
--pmp;
|
||||
}
|
||||
(void) new (pmp)saved_state(16);
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_push(bool r)
|
||||
{
|
||||
saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
|
||||
if(!r)
|
||||
{
|
||||
parenthesis_stack[parenthesis_stack_position++] = pmp->position;
|
||||
}
|
||||
boost::re_detail::inplace_destroy(pmp++);
|
||||
m_backup_state = pmp;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
inline void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_push(BidiIterator p)
|
||||
{
|
||||
saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
|
||||
--pmp;
|
||||
if(pmp < m_stack_base)
|
||||
{
|
||||
extend_stack();
|
||||
pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state);
|
||||
--pmp;
|
||||
}
|
||||
(void) new (pmp)saved_position<BidiIterator>(0, p, 17);
|
||||
m_backup_state = pmp;
|
||||
}
|
||||
*/
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[29] =
|
||||
static matcher_proc_type const s_match_vtable[30] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -95,6 +95,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
|
||||
};
|
||||
|
||||
if(state_count > max_state_count)
|
||||
@ -117,6 +118,7 @@ template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
icase = static_cast<const re_brace*>(pstate)->icase;
|
||||
bool r = true;
|
||||
switch(index)
|
||||
{
|
||||
@ -848,6 +850,127 @@ bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::si
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
|
||||
{
|
||||
BOOST_ASSERT(pstate->type == syntax_element_recurse);
|
||||
//
|
||||
// Set new call stack:
|
||||
//
|
||||
if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
|
||||
recursion_stack[recursion_stack_position].results = *m_presult;
|
||||
recursion_stack[recursion_stack_position].repeater_stack = next_count;
|
||||
pstate = static_cast<const re_jump*>(pstate)->alt.p;
|
||||
recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
|
||||
++recursion_stack_position;
|
||||
|
||||
repeater_count<BidiIterator>* saved = next_count;
|
||||
repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
|
||||
next_count = &r;
|
||||
bool result = match_all_states();
|
||||
next_count = saved;
|
||||
|
||||
if(!result)
|
||||
{
|
||||
--recursion_stack_position;
|
||||
next_count = recursion_stack[recursion_stack_position].repeater_stack;
|
||||
*m_presult = recursion_stack[recursion_stack_position].results;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
|
||||
{
|
||||
int index = static_cast<const re_brace*>(pstate)->index;
|
||||
icase = static_cast<const re_brace*>(pstate)->icase;
|
||||
if(index > 0)
|
||||
{
|
||||
if((m_match_flags & match_nosubs) == 0)
|
||||
{
|
||||
m_presult->set_second(position, index);
|
||||
}
|
||||
if(recursion_stack_position)
|
||||
{
|
||||
if(index == recursion_stack[recursion_stack_position-1].id)
|
||||
{
|
||||
--recursion_stack_position;
|
||||
recursion_info<results_type> saved = recursion_stack[recursion_stack_position];
|
||||
const re_syntax_base* saved_state = pstate = saved.preturn_address;
|
||||
repeater_count<BidiIterator>* saved_count = next_count;
|
||||
next_count = saved.repeater_stack;
|
||||
*m_presult = saved.results;
|
||||
if(!match_all_states())
|
||||
{
|
||||
recursion_stack[recursion_stack_position] = saved;
|
||||
++recursion_stack_position;
|
||||
next_count = saved_count;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if((index < 0) && (index != -4))
|
||||
{
|
||||
// matched forward lookahead:
|
||||
pstate = 0;
|
||||
return true;
|
||||
}
|
||||
pstate = pstate ? pstate->next.p : 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
|
||||
{
|
||||
if(recursion_stack_position)
|
||||
{
|
||||
BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
|
||||
--recursion_stack_position;
|
||||
const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address;
|
||||
*m_presult = recursion_stack[recursion_stack_position].results;
|
||||
if(!match_all_states())
|
||||
{
|
||||
recursion_stack[recursion_stack_position].preturn_address = saved_state;
|
||||
recursion_stack[recursion_stack_position].results = *m_presult;
|
||||
++recursion_stack_position;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
|
||||
return false;
|
||||
if((m_match_flags & match_all) && (position != last))
|
||||
return false;
|
||||
if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
|
||||
return false;
|
||||
m_presult->set_second(position);
|
||||
pstate = 0;
|
||||
m_has_found_match = true;
|
||||
if((m_match_flags & match_posix) == match_posix)
|
||||
{
|
||||
m_result.maybe_assign(*m_presult);
|
||||
if((m_match_flags & match_any) == 0)
|
||||
return false;
|
||||
}
|
||||
#ifdef BOOST_REGEX_MATCH_EXTRA
|
||||
if(match_extra & m_match_flags)
|
||||
{
|
||||
for(unsigned i = 0; i < m_presult->size(); ++i)
|
||||
if((*m_presult)[i].matched)
|
||||
((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
#ifdef BOOST_MSVC
|
||||
|
@ -118,7 +118,9 @@ enum syntax_element_type
|
||||
syntax_element_backstep = syntax_element_long_set_rep + 1,
|
||||
// an assertion that a mark was matched:
|
||||
syntax_element_assert_backref = syntax_element_backstep + 1,
|
||||
syntax_element_toggle_case = syntax_element_assert_backref + 1
|
||||
syntax_element_toggle_case = syntax_element_assert_backref + 1,
|
||||
// a recursive expression:
|
||||
syntax_element_recurse = syntax_element_toggle_case + 1
|
||||
};
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
@ -156,6 +158,7 @@ struct re_brace : public re_syntax_base
|
||||
// The index to match, can be zero (don't mark the sub-expression)
|
||||
// or negative (for perl style (?...) extentions):
|
||||
int index;
|
||||
bool icase;
|
||||
};
|
||||
|
||||
/*** struct re_dot **************************************************
|
||||
|
Reference in New Issue
Block a user