Alternatives now work.

[SVN r22525]
This commit is contained in:
John Maddock
2004-03-19 12:58:49 +00:00
parent 23d7352b19
commit 38b58f2007
5 changed files with 98 additions and 26 deletions

View File

@ -348,14 +348,13 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
state = static_cast<re_alt*>(state)->alt.p;
break;;
case syntax_element_alt:
assert(0);
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
case syntax_element_short_set_rep:
case syntax_element_long_set_rep:
{
re_alt* rep = static_cast<re_repeat*>(state);
re_alt* rep = static_cast<re_alt*>(state);
if(rep->_map[0] & mask_init)
{
if(map)

View File

@ -44,6 +44,7 @@ public:
bool parse_match_any();
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
bool parse_repeat_range(bool isbasic);
bool parse_alt();
private:
typedef bool (basic_regex_parser::*parser_proc_type)();
@ -54,6 +55,7 @@ private:
unsigned m_mark_count; // how many sub-expressions we have
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
unsigned m_repeater_id; // the id of the next repeater
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
basic_regex_parser& operator=(const basic_regex_parser&);
basic_regex_parser(const basic_regex_parser&);
@ -61,7 +63,7 @@ private:
template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
{
}
@ -201,6 +203,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
BOOST_ASSERT(0);
result = false;
break;
case regex_constants::syntax_or:
return parse_alt();
default:
result = parse_literal();
break;
@ -230,6 +234,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
pb->index = markid;
++m_position;
std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
//
// now recursively add more states, this will terminate when we get to a
// matching ')' :
@ -248,6 +256,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid;
this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:
//
this->m_alt_insert_point = last_alt_point;
return true;
}
@ -280,12 +292,22 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
else
return parse_literal();
case regex_constants::syntax_open_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
return parse_literal();
++m_position;
return parse_repeat_range(true);
case regex_constants::syntax_close_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
return parse_literal();
fail(REG_EBRACE, this->m_position - this->m_base);
result = false;
break;
case regex_constants::syntax_or:
if(this->m_pdata->m_flags & regbase::bk_vbar)
return parse_alt();
else
result = parse_literal();
break;
default:
result = parse_literal();
break;
@ -482,6 +504,50 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
return parse_repeat(min, max);
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_alt()
{
//
// error check: if there have been no previous states,
// or if the last state was a '(' then error:
//
if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
fail(REG_EMPTY, this->m_position - this->m_base);
++m_position;
//
// we need to append a trailing jump, then insert the alternative:
//
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
std::ptrdiff_t jump_offset = this->getoffset(pj);
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
jump_offset += re_alt_size;
this->m_pdata->m_data.align();
palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
//
// update m_alt_insert_point so that the next alternate gets
// inserted at the start of the second of the two we've just created:
//
this->m_alt_insert_point = this->m_pdata->m_data.size();
//
// recursively add states:
//
bool result = this->parse_all();
//
// if we didn't actually add any trailing states then that's an error:
//
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
fail(REG_EMPTY, this->m_position - this->m_base);
//
// fix up the jump we added to point to the end of the states
// that we're just added:
//
this->m_pdata->m_data.align();
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
return result;
}
} // namespace re_detail
} // namespace boost

View File

@ -59,6 +59,7 @@ public:
no_char_classes = 1 << 8, // [[:CLASS:]] not allowed
no_intervals = 1 << 9, // {x,y} not allowed
bk_plus_qm = 1 << 10, // uses \+ and \?
bk_vbar = 1 << 11, // use \| for alternatives
//
// options common to all groups:
@ -120,6 +121,8 @@ namespace regex_constants{
nosubs = ::boost::regbase::nosubs,
optimize = ::boost::regbase::optimize,
bk_plus_qm = ::boost::regbase::bk_plus_qm,
bk_vbar = ::boost::regbase::bk_vbar,
no_intervals = ::boost::regbase::no_intervals,
basic = ::boost::regbase::basic,
extended = ::boost::regbase::extended,

View File

@ -215,7 +215,8 @@ We provide this so we know how manybytes to insert when constructing the machine
enum re_jump_size_type
{
re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask)
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
};
/*** proc re_is_set_member *********************************************

View File

@ -180,29 +180,32 @@ void basic_tests()
TEST_INVALID_REGEX("a\\{1b\\}", basic);
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
#if 0
; now test the alternation operator |
- match_default normal REG_EXTENDED
a|b a 0 1
a|b b 0 1
a(b|c) ab 0 2 1 2
a(b|c) ac 0 2 1 2
a(b|c) ad -1 -1 -1 -1
|c !
c| !
(|) !
(a|) !
(|a) !
a\| a| 0 2
- match_default normal limited_ops
a| a| 0 2
a\| a| 0 2
| | 0 1
- match_default normal bk_vbar REG_NO_POSIX_TEST
a| a| 0 2
a\|b a 0 1
a\|b b 0 1
// now test the alternation operator |
TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|b|c", perl, "c", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|(b)|.", perl, "b", match_default, make_array(0, 1, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a)|b|.", perl, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
TEST_REGEX_SEARCH("a(b|c)", perl, "ab", match_default, make_array(0, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c)", perl, "ac", match_default, make_array(0, 2, 1, 2, -2, -2));
TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2));
TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
TEST_INVALID_REGEX("|c", perl);
TEST_INVALID_REGEX("c|", perl);
TEST_INVALID_REGEX("(|)", perl);
TEST_INVALID_REGEX("(a|)", perl);
TEST_INVALID_REGEX("(|a)", perl);
TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a\\|", basic, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("|", basic, "|", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
#if 0
; now test the set operator []
- match_default normal REG_EXTENDED