Added support for conditional expressions.

[SVN r22744]
This commit is contained in:
John Maddock
2004-05-05 10:41:55 +00:00
parent d2c3ec6d57
commit b5cf51fbc6
9 changed files with 202 additions and 17 deletions

View File

@ -1127,6 +1127,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
std::ptrdiff_t last_alt_point = m_alt_insert_point; std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align(); this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size(); m_alt_insert_point = this->m_pdata->m_data.size();
std::ptrdiff_t expected_alt_point = m_alt_insert_point;
// //
// select the actual extension used: // select the actual extension used:
// //
@ -1191,6 +1192,48 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
this->m_pdata->m_data.align(); this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size(); m_alt_insert_point = this->m_pdata->m_data.size();
break; break;
case regex_constants::syntax_open_mark:
{
// a conditional expression:
pb->index = markid = -4;
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
int v = this->m_traits.toi(m_position, m_end, 10);
if(v > 0)
{
re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
br->index = v;
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
fail(REG_BADRPT, m_position - m_base);
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
}
else
{
// verify that we have a lookahead or lookbehind assert:
if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
fail(REG_BADRPT, m_position - m_base);
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
{
if(++m_position == m_end)
fail(REG_BADRPT, m_position - m_base);
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
fail(REG_BADRPT, m_position - m_base);
m_position -= 3;
}
else
{
if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
&& (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
fail(REG_BADRPT, m_position - m_base);
m_position -= 2;
}
}
break;
}
default: default:
fail(REG_BADRPT, m_position - m_base); fail(REG_BADRPT, m_position - m_base);
} }
@ -1221,6 +1264,19 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
} }
} }
// //
// verify that if this is conditional expression, that we do have
// an alternative, if not add one:
//
if(markid == -4)
{
re_syntax_base* b = this->getaddress(expected_alt_point);
if(b->type != syntax_element_alt)
{
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
}
}
//
// append closing parenthesis state: // append closing parenthesis state:
// //
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));

View File

@ -334,6 +334,7 @@ private:
bool match_dot_repeat_fast(); bool match_dot_repeat_fast();
bool match_dot_repeat_slow(); bool match_dot_repeat_slow();
bool match_backstep(); bool match_backstep();
bool match_assert_backref();
bool backtrack_till_match(unsigned count); bool backtrack_till_match(unsigned count);
// find procs stored in s_find_vtable: // find procs stored in s_find_vtable:

View File

@ -81,11 +81,11 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std
difference_type dist = boost::re_detail::distance(base, last); difference_type dist = boost::re_detail::distance(base, last);
traits_size_type states = static_cast<traits_size_type>(re.size()); traits_size_type states = static_cast<traits_size_type>(re.size());
states *= states; states *= states;
difference_type lim = (std::numeric_limits<difference_type>::max)() - 1000 - states; difference_type lim = (std::numeric_limits<difference_type>::max)() - 100000 - states;
if(dist > (difference_type)(lim / states)) if(dist > (difference_type)(lim / states))
max_state_count = lim; max_state_count = lim;
else else
max_state_count = 1000 + states * dist; max_state_count = 100000 + states * dist;
} }
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*) void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
@ -294,7 +294,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
if((m_match_flags & match_nosubs) == 0) if((m_match_flags & match_nosubs) == 0)
m_presult->set_second(position, index); m_presult->set_second(position, index);
} }
else if(index < 0) else if((index < 0) && (index != -4))
{ {
// matched forward lookahead: // matched forward lookahead:
pstate = 0; pstate = 0;
@ -670,6 +670,14 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
return true; return true;
} }
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
{
// return true if marked sub-expression N has been matched:
pstate = pstate->next.p;
return (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched;
}
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any() bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
{ {

View File

@ -113,7 +113,7 @@ struct saved_single_repeat : public saved_state
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{ {
static matcher_proc_type const s_match_vtable[27] = static matcher_proc_type const s_match_vtable[28] =
{ {
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark, &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -142,6 +142,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep, &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
}; };
push_recursion_stopper(); push_recursion_stopper();
@ -344,6 +345,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
#endif #endif
return r; return r;
} }
case -4:
{
// conditional expression:
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
BOOST_ASSERT(alt->type == syntax_element_alt);
pstate = alt->next.p;
if(pstate->type == syntax_element_assert_backref)
{
if(!match_assert_backref())
pstate = alt->alt.p;
break;
}
else
{
// zero width assertion, have to match this recursively:
BOOST_ASSERT(pstate->type == syntax_element_startmark);
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
BidiIterator saved_position = position;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p;
bool r = match_all_states();
position = saved_position;
if(negated)
r = !r;
if(r)
pstate = next_pstate;
else
pstate = alt->alt.p;
break;
}
}
default: default:
{ {
assert(index > 0); assert(index > 0);
@ -929,8 +961,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
const re_repeat* rep = pmp->rep; const re_repeat* rep = pmp->rep;
std::size_t count = pmp->count; std::size_t count = pmp->count;
assert(rep->next.p); assert(rep->next.p != 0);
assert(rep->alt.p); assert(rep->alt.p != 0);
count -= rep->min; count -= rep->min;
@ -979,8 +1011,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
const re_repeat* rep = pmp->rep; const re_repeat* rep = pmp->rep;
std::size_t count = pmp->count; std::size_t count = pmp->count;
assert(rep->type == syntax_element_dot_rep); assert(rep->type == syntax_element_dot_rep);
assert(rep->next.p); assert(rep->next.p != 0);
assert(rep->alt.p); assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_wild); assert(rep->next.p->type == syntax_element_wild);
assert(count < rep->max); assert(count < rep->max);
@ -1097,8 +1129,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
position = pmp->last_position; position = pmp->last_position;
assert(rep->type == syntax_element_char_rep); assert(rep->type == syntax_element_char_rep);
assert(rep->next.p); assert(rep->next.p != 0);
assert(rep->alt.p); assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_literal); assert(rep->next.p->type == syntax_element_literal);
assert(count < rep->max); assert(count < rep->max);
@ -1161,8 +1193,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
position = pmp->last_position; position = pmp->last_position;
assert(rep->type == syntax_element_short_set_rep); assert(rep->type == syntax_element_short_set_rep);
assert(rep->next.p); assert(rep->next.p != 0);
assert(rep->alt.p); assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_set); assert(rep->next.p->type == syntax_element_set);
assert(count < rep->max); assert(count < rep->max);
@ -1226,8 +1258,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
position = pmp->last_position; position = pmp->last_position;
assert(rep->type == syntax_element_long_set_rep); assert(rep->type == syntax_element_long_set_rep);
assert(rep->next.p); assert(rep->next.p != 0);
assert(rep->alt.p); assert(rep->alt.p != 0);
assert(rep->next.p->type == syntax_element_long_set); assert(rep->next.p->type == syntax_element_long_set);
assert(position != last); assert(position != last);
assert(count < rep->max); assert(count < rep->max);

View File

@ -48,7 +48,7 @@ public:
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{ {
static matcher_proc_type const s_match_vtable[27] = static matcher_proc_type const s_match_vtable[28] =
{ {
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark, &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -77,6 +77,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
&perl_matcher<BidiIterator, Allocator, traits>::match_backstep, &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
}; };
if(state_count > max_state_count) if(state_count > max_state_count)
@ -157,6 +158,37 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
#endif #endif
break; break;
} }
case -4:
{
// conditional expression:
const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
BOOST_ASSERT(alt->type == syntax_element_alt);
pstate = alt->next.p;
if(pstate->type == syntax_element_assert_backref)
{
if(!match_assert_backref())
pstate = alt->alt.p;
break;
}
else
{
// zero width assertion, have to match this recursively:
BOOST_ASSERT(pstate->type == syntax_element_startmark);
bool negated = static_cast<const re_brace*>(pstate)->index == -2;
BidiIterator saved_position = position;
const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
pstate = pstate->next.p->next.p;
bool r = match_all_states();
position = saved_position;
if(negated)
r = !r;
if(r)
pstate = next_pstate;
else
pstate = alt->alt.p;
break;
}
}
default: default:
{ {
assert(index > 0); assert(index > 0);

View File

@ -108,7 +108,9 @@ enum syntax_element_type
syntax_element_short_set_rep = syntax_element_char_rep + 1, syntax_element_short_set_rep = syntax_element_char_rep + 1,
syntax_element_long_set_rep = syntax_element_short_set_rep + 1, syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
// a backstep for lookbehind repeats: // a backstep for lookbehind repeats:
syntax_element_backstep = syntax_element_long_set_rep + 1 syntax_element_backstep = syntax_element_long_set_rep + 1,
// an assertion that a mark was matched:
syntax_element_assert_backref = syntax_element_backstep +1
}; };
#ifdef BOOST_REGEX_DEBUG #ifdef BOOST_REGEX_DEBUG

View File

@ -784,6 +784,8 @@ void test_tricky_cases2()
TEST_REGEX_SEARCH("([a-c]+)\\1", perl, "abcbc", match_default, make_array(1, 5, 1, 3, -2, -2)); TEST_REGEX_SEARCH("([a-c]+)\\1", perl, "abcbc", match_default, make_array(1, 5, 1, 3, -2, -2));
TEST_REGEX_SEARCH(".+abc", perl, "xxxxxxxxyyyyyyyyab", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH(".+abc", perl, "xxxxxxxxyyyyyyyyab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2)); TEST_REGEX_SEARCH("(.+)\\1", perl, "abcdxxxyyyxxxyyy", match_default, make_array(4, 16, 4, 10, -2, -2));
// this should not throw:
TEST_REGEX_SEARCH("[_]+$", perl, "___________________________________________x", match_default, make_array(-2, -2));
// //
// the strings in the next test case are too long for most compilers to cope with, // the strings in the next test case are too long for most compilers to cope with,
@ -1315,5 +1317,55 @@ void test_nosubs()
TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2)); TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl|nosubs, "abbb", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("(.*).*", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, 6, 6, -2, -2)); TEST_REGEX_SEARCH("(.*).*", perl|nosubs, "abcdef", match_default, make_array(0, 6, -2, 6, 6, -2, -2));
TEST_REGEX_SEARCH("(a*)*", perl|nosubs, "bc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2)); TEST_REGEX_SEARCH("(a*)*", perl|nosubs, "bc", match_default, make_array(0, 0, -2, 1, 1, -2, 2, 2, -2, -2));
}
void test_conditionals()
{
using namespace boost::regex_constants;
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aA", match_default, make_array(0, 2, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bB", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "aB", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?:(a)|b)(?(1)A|B)", perl, "bA", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "aa", match_default, make_array(0, 2, 0, 1, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "bb", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(a)?(?(1)a|b)+$", perl, "ab", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "12", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "123", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?=abc)\\w{3}:|\\d\\d)$", perl, "xyz", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "abc:", match_default, make_array(0, 4, -2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "12", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "123", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("^(?(?!abc)\\d\\d|\\w{3}:)$", perl, "xyz", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "cat", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "focat", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("(?(?<=foo)bar|cat)", perl, "foocat", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foobar", match_default, make_array(3, 6, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "cat", match_default, make_array(0, 3, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "fcat", match_default, make_array(1, 4, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "focat", match_default, make_array(2, 5, -2, -2));
TEST_REGEX_SEARCH("(?(?<!foo)cat|bar)", perl, "foocat", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "abcd", match_default, make_array(0, 4, -1, -1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd)", match_default, make_array(0, 6, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "the quick (abcd) fox", match_default, make_array(0, 10, -1, -1, -2, 10, 16, 10, 11, -2, 16, 20, -1, -1, -2, -2));
TEST_REGEX_SEARCH("(\\()?[^()]+(?(1)\\))", perl, "(abcd", match_default, make_array(1, 5, -1, -1, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12", match_default, make_array(0, 2, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12a", match_default, make_array(0, 3, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "12aa", match_default, make_array(0, 4, 0, 1, 1, 2, -2, -2));
TEST_REGEX_SEARCH("^(?(2)a|(1)(2))+$", perl, "1234", match_default, make_array(-2, -2));
//TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
#if 0
#endif
} }

View File

@ -24,6 +24,7 @@ int cpp_main(int argc, char * argv[])
test_fast_repeats(); test_fast_repeats();
test_independent_subs(); test_independent_subs();
test_nosubs(); test_nosubs();
test_conditionals();
return error_count; return error_count;
} }

View File

@ -142,5 +142,6 @@ void test_fast_repeats();
void test_tricky_cases2(); void test_tricky_cases2();
void test_independent_subs(); void test_independent_subs();
void test_nosubs(); void test_nosubs();
void test_conditionals();
#endif #endif