mirror of
https://github.com/boostorg/regex.git
synced 2025-07-30 04:27:22 +02:00
Added support for conditional expressions.
[SVN r22744]
This commit is contained in:
@ -1127,6 +1127,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
std::ptrdiff_t expected_alt_point = m_alt_insert_point;
|
||||
//
|
||||
// select the actual extension used:
|
||||
//
|
||||
@ -1191,6 +1192,48 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
break;
|
||||
case regex_constants::syntax_open_mark:
|
||||
{
|
||||
// a conditional expression:
|
||||
pb->index = markid = -4;
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
int v = this->m_traits.toi(m_position, m_end, 10);
|
||||
if(v > 0)
|
||||
{
|
||||
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
||||
br->index = v;
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
}
|
||||
else
|
||||
{
|
||||
// verify that we have a lookahead or lookbehind assert:
|
||||
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
|
||||
{
|
||||
if(++m_position == m_end)
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
m_position -= 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
|
||||
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
m_position -= 2;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fail(REG_BADRPT, m_position - m_base);
|
||||
}
|
||||
@ -1221,6 +1264,19 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
}
|
||||
}
|
||||
//
|
||||
// verify that if this is conditional expression, that we do have
|
||||
// an alternative, if not add one:
|
||||
//
|
||||
if(markid == -4)
|
||||
{
|
||||
re_syntax_base* b = this->getaddress(expected_alt_point);
|
||||
if(b->type != syntax_element_alt)
|
||||
{
|
||||
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
|
||||
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
|
||||
}
|
||||
}
|
||||
//
|
||||
// append closing parenthesis state:
|
||||
//
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
|
@ -334,6 +334,7 @@ private:
|
||||
bool match_dot_repeat_fast();
|
||||
bool match_dot_repeat_slow();
|
||||
bool match_backstep();
|
||||
bool match_assert_backref();
|
||||
bool backtrack_till_match(unsigned count);
|
||||
|
||||
// find procs stored in s_find_vtable:
|
||||
|
@ -81,11 +81,11 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std
|
||||
difference_type dist = boost::re_detail::distance(base, last);
|
||||
traits_size_type states = static_cast<traits_size_type>(re.size());
|
||||
states *= states;
|
||||
difference_type lim = (std::numeric_limits<difference_type>::max)() - 1000 - states;
|
||||
difference_type lim = (std::numeric_limits<difference_type>::max)() - 100000 - states;
|
||||
if(dist > (difference_type)(lim / states))
|
||||
max_state_count = lim;
|
||||
else
|
||||
max_state_count = 1000 + states * dist;
|
||||
max_state_count = 100000 + states * dist;
|
||||
}
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
|
||||
@ -294,7 +294,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
|
||||
if((m_match_flags & match_nosubs) == 0)
|
||||
m_presult->set_second(position, index);
|
||||
}
|
||||
else if(index < 0)
|
||||
else if((index < 0) && (index != -4))
|
||||
{
|
||||
// matched forward lookahead:
|
||||
pstate = 0;
|
||||
@ -670,6 +670,14 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
|
||||
{
|
||||
// return true if marked sub-expression N has been matched:
|
||||
pstate = pstate->next.p;
|
||||
return (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
|
||||
{
|
||||
|
@ -113,7 +113,7 @@ struct saved_single_repeat : public saved_state
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[27] =
|
||||
static matcher_proc_type const s_match_vtable[28] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -142,6 +142,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
};
|
||||
|
||||
push_recursion_stopper();
|
||||
@ -344,6 +345,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
case -4:
|
||||
{
|
||||
// conditional expression:
|
||||
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
|
||||
BOOST_ASSERT(alt->type == syntax_element_alt);
|
||||
pstate = alt->next.p;
|
||||
if(pstate->type == syntax_element_assert_backref)
|
||||
{
|
||||
if(!match_assert_backref())
|
||||
pstate = alt->alt.p;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// zero width assertion, have to match this recursively:
|
||||
BOOST_ASSERT(pstate->type == syntax_element_startmark);
|
||||
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
|
||||
BidiIterator saved_position = position;
|
||||
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
|
||||
pstate = pstate->next.p->next.p;
|
||||
bool r = match_all_states();
|
||||
position = saved_position;
|
||||
if(negated)
|
||||
r = !r;
|
||||
if(r)
|
||||
pstate = next_pstate;
|
||||
else
|
||||
pstate = alt->alt.p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
assert(index > 0);
|
||||
@ -929,8 +961,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
|
||||
|
||||
const re_repeat* rep = pmp->rep;
|
||||
std::size_t count = pmp->count;
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
assert(rep->next.p != 0);
|
||||
assert(rep->alt.p != 0);
|
||||
|
||||
count -= rep->min;
|
||||
|
||||
@ -979,8 +1011,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
|
||||
const re_repeat* rep = pmp->rep;
|
||||
std::size_t count = pmp->count;
|
||||
assert(rep->type == syntax_element_dot_rep);
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
assert(rep->next.p != 0);
|
||||
assert(rep->alt.p != 0);
|
||||
assert(rep->next.p->type == syntax_element_wild);
|
||||
|
||||
assert(count < rep->max);
|
||||
@ -1097,8 +1129,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
|
||||
position = pmp->last_position;
|
||||
|
||||
assert(rep->type == syntax_element_char_rep);
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
assert(rep->next.p != 0);
|
||||
assert(rep->alt.p != 0);
|
||||
assert(rep->next.p->type == syntax_element_literal);
|
||||
assert(count < rep->max);
|
||||
|
||||
@ -1161,8 +1193,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
|
||||
position = pmp->last_position;
|
||||
|
||||
assert(rep->type == syntax_element_short_set_rep);
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
assert(rep->next.p != 0);
|
||||
assert(rep->alt.p != 0);
|
||||
assert(rep->next.p->type == syntax_element_set);
|
||||
assert(count < rep->max);
|
||||
|
||||
@ -1226,8 +1258,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
|
||||
position = pmp->last_position;
|
||||
|
||||
assert(rep->type == syntax_element_long_set_rep);
|
||||
assert(rep->next.p);
|
||||
assert(rep->alt.p);
|
||||
assert(rep->next.p != 0);
|
||||
assert(rep->alt.p != 0);
|
||||
assert(rep->next.p->type == syntax_element_long_set);
|
||||
assert(position != last);
|
||||
assert(count < rep->max);
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
{
|
||||
static matcher_proc_type const s_match_vtable[27] =
|
||||
static matcher_proc_type const s_match_vtable[28] =
|
||||
{
|
||||
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
|
||||
@ -77,6 +77,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
|
||||
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
|
||||
};
|
||||
|
||||
if(state_count > max_state_count)
|
||||
@ -157,6 +158,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case -4:
|
||||
{
|
||||
// conditional expression:
|
||||
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
|
||||
BOOST_ASSERT(alt->type == syntax_element_alt);
|
||||
pstate = alt->next.p;
|
||||
if(pstate->type == syntax_element_assert_backref)
|
||||
{
|
||||
if(!match_assert_backref())
|
||||
pstate = alt->alt.p;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// zero width assertion, have to match this recursively:
|
||||
BOOST_ASSERT(pstate->type == syntax_element_startmark);
|
||||
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
|
||||
BidiIterator saved_position = position;
|
||||
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
|
||||
pstate = pstate->next.p->next.p;
|
||||
bool r = match_all_states();
|
||||
position = saved_position;
|
||||
if(negated)
|
||||
r = !r;
|
||||
if(r)
|
||||
pstate = next_pstate;
|
||||
else
|
||||
pstate = alt->alt.p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
assert(index > 0);
|
||||
|
@ -108,7 +108,9 @@ enum syntax_element_type
|
||||
syntax_element_short_set_rep = syntax_element_char_rep + 1,
|
||||
syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
|
||||
// a backstep for lookbehind repeats:
|
||||
syntax_element_backstep = syntax_element_long_set_rep + 1
|
||||
syntax_element_backstep = syntax_element_long_set_rep + 1,
|
||||
// an assertion that a mark was matched:
|
||||
syntax_element_assert_backref = syntax_element_backstep +1
|
||||
};
|
||||
|
||||
#ifdef BOOST_REGEX_DEBUG
|
||||
|
@ -784,6 +784,8 @@ void test_tricky_cases2()
|
||||
TEST_REGEX_SEARCH("([a-c]+)\\1", perl, "abcbc", match_default, make_array(1, 5, 1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH(".+abc", perl, "xxxxxxxxyyyyyyyyab", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2));
|
||||
// this should not throw:
|
||||
TEST_REGEX_SEARCH("[_]+$", perl, "___________________________________________x", match_default, make_array(-2, -2));
|
||||
|
||||
//
|
||||
// the strings in the next test case are too long for most compilers to cope with,
|
||||
@ -1315,5 +1317,55 @@ void test_nosubs()
|
||||
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(.*).*", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, 6, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a*)*", perl|nosubs, "bc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2));
|
||||
|
||||
}
|
||||
|
||||
void test_conditionals()
|
||||
{
|
||||
using namespace boost::regex_constants;
|
||||
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aA", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bB", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aB", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bA", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "aa", match_default, make_array(0, 2, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "bb", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "ab", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "12", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "123", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "xyz", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "12", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "123", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "xyz", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "cat", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "focat", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foocat", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "cat", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "focat", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foocat", match_default, make_array(-2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "abcd", match_default, make_array(0, 4, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd)", match_default, make_array(0, 6, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "the quick (abcd) fox", match_default, make_array(0, 10, -1, -1, -2, 10, 16, 10, 11, -2, 16, 20, -1, -1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd", match_default, make_array(1, 5, -1, -1, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12", match_default, make_array(0, 2, 0, 1, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12a", match_default, make_array(0, 3, 0, 1, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12aa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "1234", match_default, make_array(-2, -2));
|
||||
|
||||
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
|
||||
|
||||
#if 0
|
||||
#endif
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ int cpp_main(int argc, char * argv[])
|
||||
test_fast_repeats();
|
||||
test_independent_subs();
|
||||
test_nosubs();
|
||||
test_conditionals();
|
||||
return error_count;
|
||||
}
|
||||
|
||||
|
@ -142,5 +142,6 @@ void test_fast_repeats();
|
||||
void test_tricky_cases2();
|
||||
void test_independent_subs();
|
||||
void test_nosubs();
|
||||
void test_conditionals();
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user