Added support for conditional expressions.

[SVN r22744]
This commit is contained in:
John Maddock
2004-05-05 10:41:55 +00:00
parent d2c3ec6d57
commit b5cf51fbc6
9 changed files with 202 additions and 17 deletions

View File

@ -1127,6 +1127,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
std::ptrdiff_t expected_alt_point = m_alt_insert_point;
//
// select the actual extension used:
//
@ -1191,6 +1192,48 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
break;
case regex_constants::syntax_open_mark:
{
// a conditional expression:
pb->index = markid = -4;
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
int v = this->m_traits.toi(m_position, m_end, 10);
if(v > 0)
{
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
br->index = v;
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
fail(REG_BADRPT, m_position - m_base);
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
}
else
{
// verify that we have a lookahead or lookbehind assert:
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
fail(REG_BADRPT, m_position - m_base);
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
{
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
fail(REG_BADRPT, m_position - m_base);
m_position -= 3;
}
else
{
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
fail(REG_BADRPT, m_position - m_base);
m_position -= 2;
}
}
break;
}
default:
fail(REG_BADRPT, m_position - m_base);
}
@ -1221,6 +1264,19 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
}
}
//
// verify that if this is conditional expression, that we do have
// an alternative, if not add one:
//
if(markid == -4)
{
re_syntax_base* b = this->getaddress(expected_alt_point);
if(b->type != syntax_element_alt)
{
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
}
}
//
// append closing parenthesis state:
//
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));

View File

@ -334,6 +334,7 @@ private:
bool match_dot_repeat_fast();
bool match_dot_repeat_slow();
bool match_backstep();
bool match_assert_backref();
bool backtrack_till_match(unsigned count);
// find procs stored in s_find_vtable:

View File

@ -81,11 +81,11 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std
difference_type dist = boost::re_detail::distance(base, last);
traits_size_type states = static_cast<traits_size_type>(re.size());
states *= states;
difference_type lim = (std::numeric_limits<difference_type>::max)() - 1000 - states;
difference_type lim = (std::numeric_limits<difference_type>::max)() - 100000 - states;
if(dist > (difference_type)(lim / states))
max_state_count = lim;
else
max_state_count = 1000 + states * dist;
max_state_count = 100000 + states * dist;
}
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
@ -294,7 +294,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
if((m_match_flags & match_nosubs) == 0)
m_presult->set_second(position, index);
}
else if(index < 0)
else if((index < 0) && (index != -4))
{
// matched forward lookahead:
pstate = 0;
@ -670,6 +670,14 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
return true;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
{
// return true if marked sub-expression N has been matched:
pstate = pstate->next.p;
return (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
{

View File

@ -113,7 +113,7 @@ struct saved_single_repeat : public saved_state
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[27] =
static matcher_proc_type const s_match_vtable[28] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -142,6 +142,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
};
push_recursion_stopper();
@ -344,6 +345,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
#endif
return r;
}
case -4:
{
// conditional expression:
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
BOOST_ASSERT(alt->type == syntax_element_alt);
pstate = alt->next.p;
if(pstate->type == syntax_element_assert_backref)
{
if(!match_assert_backref())
pstate = alt->alt.p;
break;
}
else
{
// zero width assertion, have to match this recursively:
BOOST_ASSERT(pstate->type == syntax_element_startmark);
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
BidiIterator saved_position = position;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p;
bool r = match_all_states();
position = saved_position;
if(negated)
r = !r;
if(r)
pstate = next_pstate;
else
pstate = alt->alt.p;
break;
}
}
default:
{
assert(index > 0);
@ -929,8 +961,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
const re_repeat* rep = pmp->rep;
std::size_t count = pmp->count;
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
count -= rep->min;
@ -979,8 +1011,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
const re_repeat* rep = pmp->rep;
std::size_t count = pmp->count;
assert(rep->type == syntax_element_dot_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_wild);
assert(count < rep->max);
@ -1097,8 +1129,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
position = pmp->last_position;
assert(rep->type == syntax_element_char_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_literal);
assert(count < rep->max);
@ -1161,8 +1193,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
position = pmp->last_position;
assert(rep->type == syntax_element_short_set_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_set);
assert(count < rep->max);
@ -1226,8 +1258,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
position = pmp->last_position;
assert(rep->type == syntax_element_long_set_rep);
assert(rep->next.p);
assert(rep->alt.p);
assert(rep->next.p != 0);
assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_long_set);
assert(position != last);
assert(count < rep->max);

View File

@ -48,7 +48,7 @@ public:
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{
static matcher_proc_type const s_match_vtable[27] =
static matcher_proc_type const s_match_vtable[28] =
{
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -77,6 +77,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
};
if(state_count > max_state_count)
@ -157,6 +158,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
#endif
break;
}
case -4:
{
// conditional expression:
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
BOOST_ASSERT(alt->type == syntax_element_alt);
pstate = alt->next.p;
if(pstate->type == syntax_element_assert_backref)
{
if(!match_assert_backref())
pstate = alt->alt.p;
break;
}
else
{
// zero width assertion, have to match this recursively:
BOOST_ASSERT(pstate->type == syntax_element_startmark);
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
BidiIterator saved_position = position;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p;
bool r = match_all_states();
position = saved_position;
if(negated)
r = !r;
if(r)
pstate = next_pstate;
else
pstate = alt->alt.p;
break;
}
}
default:
{
assert(index > 0);

View File

@ -108,7 +108,9 @@ enum syntax_element_type
syntax_element_short_set_rep = syntax_element_char_rep + 1,
syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
// a backstep for lookbehind repeats:
syntax_element_backstep = syntax_element_long_set_rep + 1
syntax_element_backstep = syntax_element_long_set_rep + 1,
// an assertion that a mark was matched:
syntax_element_assert_backref = syntax_element_backstep +1
};
#ifdef BOOST_REGEX_DEBUG

View File

@ -784,6 +784,8 @@ void test_tricky_cases2()
TEST_REGEX_SEARCH("([a-c]+)\\1", perl, "abcbc", match_default, make_array(1, 5, 1, 3, -2, -2));
TEST_REGEX_SEARCH(".+abc", perl, "xxxxxxxxyyyyyyyyab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2));
// this should not throw:
TEST_REGEX_SEARCH("[_]+$", perl, "___________________________________________x", match_default, make_array(-2, -2));
//
// the strings in the next test case are too long for most compilers to cope with,
@ -1315,5 +1317,55 @@ void test_nosubs()
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(.*).*", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, 6, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", perl|nosubs, "bc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2));
}
void test_conditionals()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aA", match_default, make_array(0, 2, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bB", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aB", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bA", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "aa", match_default, make_array(0, 2, 0, 1, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "bb", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "ab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "12", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "123", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "xyz", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "12", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "123", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "xyz", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "cat", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "focat", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foocat", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "cat", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "focat", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foocat", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "abcd", match_default, make_array(0, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd)", match_default, make_array(0, 6, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "the quick (abcd) fox", match_default, make_array(0, 10, -1, -1, -2, 10, 16, 10, 11, -2, 16, 20, -1, -1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd", match_default, make_array(1, 5, -1, -1, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12", match_default, make_array(0, 2, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12a", match_default, make_array(0, 3, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12aa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "1234", match_default, make_array(-2, -2));
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
#if 0
#endif
}

View File

@ -24,6 +24,7 @@ int cpp_main(int argc, char * argv[])
test_fast_repeats();
test_independent_subs();
test_nosubs();
test_conditionals();
return error_count;
}

View File

@ -142,5 +142,6 @@ void test_fast_repeats();
void test_tricky_cases2();
void test_independent_subs();
void test_nosubs();
void test_conditionals();
#endif