mirror of
https://github.com/boostorg/regex.git
synced 2025-07-17 06:12:10 +02:00
Add support for PRUNE and SKIP (no MARK's though).
This commit is contained in:
@ -2760,7 +2760,57 @@ bool basic_regex_parser<charT, traits>::parse_perl_verb()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
++m_position;
|
++m_position;
|
||||||
this->append_state(syntax_element_commit);
|
static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
|
||||||
|
this->m_pdata->m_disable_match_any = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'P':
|
||||||
|
if(++m_position == m_end)
|
||||||
|
{
|
||||||
|
// Rewind to start of (* sequence:
|
||||||
|
--m_position;
|
||||||
|
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||||
|
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if(match_verb("RUNE"))
|
||||||
|
{
|
||||||
|
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||||
|
{
|
||||||
|
// Rewind to start of (* sequence:
|
||||||
|
--m_position;
|
||||||
|
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||||
|
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
++m_position;
|
||||||
|
static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
|
||||||
|
this->m_pdata->m_disable_match_any = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'S':
|
||||||
|
if(++m_position == m_end)
|
||||||
|
{
|
||||||
|
// Rewind to start of (* sequence:
|
||||||
|
--m_position;
|
||||||
|
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||||
|
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if(match_verb("KIP"))
|
||||||
|
{
|
||||||
|
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
||||||
|
{
|
||||||
|
// Rewind to start of (* sequence:
|
||||||
|
--m_position;
|
||||||
|
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
||||||
|
fail(regex_constants::error_perl_extension, m_position - m_base);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
++m_position;
|
||||||
|
static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
|
||||||
this->m_pdata->m_disable_match_any = true;
|
this->m_pdata->m_disable_match_any = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -554,6 +554,8 @@ private:
|
|||||||
bool m_recursive_result;
|
bool m_recursive_result;
|
||||||
// how many memory blocks have we used up?:
|
// how many memory blocks have we used up?:
|
||||||
unsigned used_block_count;
|
unsigned used_block_count;
|
||||||
|
// We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP:
|
||||||
|
bool m_unwound_lookahead;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// these operations aren't allowed, so are declared private,
|
// these operations aren't allowed, so are declared private,
|
||||||
|
@ -354,6 +354,9 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
|
|||||||
#endif
|
#endif
|
||||||
if(!m_has_found_match)
|
if(!m_has_found_match)
|
||||||
position = restart; // reset search postion
|
position = restart; // reset search postion
|
||||||
|
#ifdef BOOST_REGEX_RECURSIVE
|
||||||
|
m_can_backtrack = true; // reset for further searches
|
||||||
|
#endif
|
||||||
return m_has_found_match;
|
return m_has_found_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1014,6 +1014,27 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
|
|||||||
// however we might not unwind correctly in that case, so for now,
|
// however we might not unwind correctly in that case, so for now,
|
||||||
// just mark that we don't backtrack into whatever is left (or rather
|
// just mark that we don't backtrack into whatever is left (or rather
|
||||||
// we'll unwind it unconditionally without pausing to try other matches).
|
// we'll unwind it unconditionally without pausing to try other matches).
|
||||||
|
|
||||||
|
switch(static_cast<const re_commit*>(pstate)->action)
|
||||||
|
{
|
||||||
|
case commit_commit:
|
||||||
|
if(base != last)
|
||||||
|
{
|
||||||
|
restart = last;
|
||||||
|
--restart;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case commit_skip:
|
||||||
|
if(position != base)
|
||||||
|
{
|
||||||
|
restart = position;
|
||||||
|
--restart;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case commit_prune:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
saved_state* pmp = m_backup_state;
|
saved_state* pmp = m_backup_state;
|
||||||
--pmp;
|
--pmp;
|
||||||
if(pmp < m_stack_base)
|
if(pmp < m_stack_base)
|
||||||
@ -1025,8 +1046,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
|
|||||||
(void) new (pmp)saved_state(16);
|
(void) new (pmp)saved_state(16);
|
||||||
m_backup_state = pmp;
|
m_backup_state = pmp;
|
||||||
pstate = pstate->next.p;
|
pstate = pstate->next.p;
|
||||||
// If we don't find a match we don't want to search further either:
|
|
||||||
restart = last;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1102,6 +1121,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match)
|
|||||||
};
|
};
|
||||||
|
|
||||||
m_recursive_result = have_match;
|
m_recursive_result = have_match;
|
||||||
|
m_unwound_lookahead = false;
|
||||||
unwind_proc_type unwinder;
|
unwind_proc_type unwinder;
|
||||||
bool cont;
|
bool cont;
|
||||||
//
|
//
|
||||||
@ -1166,6 +1186,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_assertion(bool r)
|
|||||||
m_recursive_result = pmp->positive ? r : !r;
|
m_recursive_result = pmp->positive ? r : !r;
|
||||||
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
|
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(pmp++);
|
||||||
m_backup_state = pmp;
|
m_backup_state = pmp;
|
||||||
|
m_unwound_lookahead = true;
|
||||||
return !result; // return false if the assertion was matched to stop search.
|
return !result; // return false if the assertion was matched to stop search.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1653,7 +1674,24 @@ template <class BidiIterator, class Allocator, class traits>
|
|||||||
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
|
bool perl_matcher<BidiIterator, Allocator, traits>::unwind_commit(bool b)
|
||||||
{
|
{
|
||||||
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
|
boost::BOOST_REGEX_DETAIL_NS::inplace_destroy(m_backup_state++);
|
||||||
while(unwind(b)) {}
|
while(unwind(b) && !m_unwound_lookahead){}
|
||||||
|
if(m_unwound_lookahead && pstate)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// If we stop because we just unwound an assertion, put the
|
||||||
|
// commit state back on the stack again:
|
||||||
|
//
|
||||||
|
saved_state* pmp = m_backup_state;
|
||||||
|
--pmp;
|
||||||
|
if(pmp < m_stack_base)
|
||||||
|
{
|
||||||
|
extend_stack();
|
||||||
|
pmp = m_backup_state;
|
||||||
|
--pmp;
|
||||||
|
}
|
||||||
|
(void) new (pmp)saved_state(16);
|
||||||
|
m_backup_state = pmp;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1001,7 +1001,16 @@ template <class BidiIterator, class Allocator, class traits>
|
|||||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
|
bool perl_matcher<BidiIterator, Allocator, traits>::match_commit()
|
||||||
{
|
{
|
||||||
m_can_backtrack = false;
|
m_can_backtrack = false;
|
||||||
restart = last;
|
int action = static_cast<const re_commit*>(pstate)->action;
|
||||||
|
switch(action)
|
||||||
|
{
|
||||||
|
case commit_commit:
|
||||||
|
restart = last;
|
||||||
|
break;
|
||||||
|
case commit_skip:
|
||||||
|
restart = position;
|
||||||
|
break;
|
||||||
|
}
|
||||||
pstate = pstate->next.p;
|
pstate = pstate->next.p;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -260,6 +260,21 @@ struct re_recurse : public re_jump
|
|||||||
int state_id; // identifier of first nested repeat within the recursion.
|
int state_id; // identifier of first nested repeat within the recursion.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*** struct re_commit *************************************************
|
||||||
|
Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens
|
||||||
|
if no match is found and we start searching forward.
|
||||||
|
**********************************************************************/
|
||||||
|
enum commit_type
|
||||||
|
{
|
||||||
|
commit_prune,
|
||||||
|
commit_skip,
|
||||||
|
commit_commit,
|
||||||
|
};
|
||||||
|
struct re_commit : public re_syntax_base
|
||||||
|
{
|
||||||
|
commit_type action;
|
||||||
|
};
|
||||||
|
|
||||||
/*** enum re_jump_size_type *******************************************
|
/*** enum re_jump_size_type *******************************************
|
||||||
Provides compiled size of re_jump structure (allowing for trailing alignment).
|
Provides compiled size of re_jump structure (allowing for trailing alignment).
|
||||||
We provide this so we know how manybytes to insert when constructing the machine
|
We provide this so we know how manybytes to insert when constructing the machine
|
||||||
|
@ -969,16 +969,19 @@ void test_verbs()
|
|||||||
TEST_REGEX_SEARCH("(\\w+)(?>b(*COMMIT))\\w{2}", perl, "abbb", match_default, make_array(0, 4, 0, 1, -2, -2));
|
TEST_REGEX_SEARCH("(\\w+)(?>b(*COMMIT))\\w{2}", perl, "abbb", match_default, make_array(0, 4, 0, 1, -2, -2));
|
||||||
TEST_REGEX_SEARCH("(\\w+)b(*COMMIT)\\w{2}", perl, "abbb", match_default, make_array(-2, -2));
|
TEST_REGEX_SEARCH("(\\w+)b(*COMMIT)\\w{2}", perl, "abbb", match_default, make_array(-2, -2));
|
||||||
|
|
||||||
|
TEST_REGEX_SEARCH("a+b?(*PRUNE)c+(*FAIL)", perl, "aaabccc", match_default, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("a+b?(*SKIP)c+(*FAIL)", perl, "aaabcccaaabccc", match_default, make_array(-2, -2));
|
||||||
//
|
//
|
||||||
|
TEST_REGEX_SEARCH("^(?=a(*SKIP)b|ac)", perl, "ac", match_default, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ab", match_default, make_array(0, 0, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("^(?=a(*PRUNE)b)", perl, "ac", match_default, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("AA+(*PRUNE)(B|Z)|AC", perl, "AAAC", match_default, make_array(2, 4, -1, -1, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|C", perl, "AAAC", match_default, make_array(3, 4, -1, -1, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("AA+(*SKIP)(B|Z)|AC", perl, "AAAC", match_default, make_array(-2, -2));
|
||||||
|
TEST_REGEX_SEARCH("AA+(*SKIP)B|C", perl, "AAAC", match_default, make_array(3, 4, -2, -2));
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
/a+b?(*PRUNE)c+(*FAIL)/
|
|
||||||
aaabccc
|
|
||||||
|
|
||||||
/a+b?(*SKIP)c+(*FAIL)/
|
|
||||||
aaabcccaaabccc
|
|
||||||
|
|
||||||
/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
|
/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
|
||||||
aaaxxxxxx
|
aaaxxxxxx
|
||||||
aaa++++++
|
aaa++++++
|
||||||
@ -1000,14 +1003,6 @@ void test_verbs()
|
|||||||
/a+b?(*THEN)c+(*FAIL)/
|
/a+b?(*THEN)c+(*FAIL)/
|
||||||
aaabccc
|
aaabccc
|
||||||
|
|
||||||
/^(?=a(*SKIP)b|ac)/
|
|
||||||
** Failers
|
|
||||||
ac
|
|
||||||
|
|
||||||
/^(?=a(*PRUNE)b)/
|
|
||||||
ab
|
|
||||||
** Failers
|
|
||||||
ac
|
|
||||||
|
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user