Alternatives now work.

[SVN r22525]
This commit is contained in:
John Maddock
2004-03-19 12:58:49 +00:00
parent 23d7352b19
commit 38b58f2007
5 changed files with 98 additions and 26 deletions

View File

@@ -348,14 +348,13 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
state = static_cast<re_alt*>(state)->alt.p; state = static_cast<re_alt*>(state)->alt.p;
break;; break;;
case syntax_element_alt: case syntax_element_alt:
assert(0);
case syntax_element_rep: case syntax_element_rep:
case syntax_element_dot_rep: case syntax_element_dot_rep:
case syntax_element_char_rep: case syntax_element_char_rep:
case syntax_element_short_set_rep: case syntax_element_short_set_rep:
case syntax_element_long_set_rep: case syntax_element_long_set_rep:
{ {
re_alt* rep = static_cast<re_repeat*>(state); re_alt* rep = static_cast<re_alt*>(state);
if(rep->_map[0] & mask_init) if(rep->_map[0] & mask_init)
{ {
if(map) if(map)

View File

@@ -44,6 +44,7 @@ public:
bool parse_match_any(); bool parse_match_any();
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)()); bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
bool parse_repeat_range(bool isbasic); bool parse_repeat_range(bool isbasic);
bool parse_alt();
private: private:
typedef bool (basic_regex_parser::*parser_proc_type)(); typedef bool (basic_regex_parser::*parser_proc_type)();
@@ -54,6 +55,7 @@ private:
unsigned m_mark_count; // how many sub-expressions we have unsigned m_mark_count; // how many sub-expressions we have
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
unsigned m_repeater_id; // the id of the next repeater unsigned m_repeater_id; // the id of the next repeater
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
basic_regex_parser& operator=(const basic_regex_parser&); basic_regex_parser& operator=(const basic_regex_parser&);
basic_regex_parser(const basic_regex_parser&); basic_regex_parser(const basic_regex_parser&);
@@ -61,7 +63,7 @@ private:
template <class charT, class traits> template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data) basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0) : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
{ {
} }
@@ -201,6 +203,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
BOOST_ASSERT(0); BOOST_ASSERT(0);
result = false; result = false;
break; break;
case regex_constants::syntax_or:
return parse_alt();
default: default:
result = parse_literal(); result = parse_literal();
break; break;
@@ -230,6 +234,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
pb->index = markid; pb->index = markid;
++m_position; ++m_position;
std::ptrdiff_t last_paren_start = this->getoffset(pb); std::ptrdiff_t last_paren_start = this->getoffset(pb);
// back up insertion point for alternations, and set new point:
std::ptrdiff_t last_alt_point = m_alt_insert_point;
this->m_pdata->m_data.align();
m_alt_insert_point = this->m_pdata->m_data.size();
// //
// now recursively add more states, this will terminate when we get to a // now recursively add more states, this will terminate when we get to a
// matching ')' : // matching ')' :
@@ -248,6 +256,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
pb->index = markid; pb->index = markid;
this->m_paren_start = last_paren_start; this->m_paren_start = last_paren_start;
//
// restore the alternate insertion point:
//
this->m_alt_insert_point = last_alt_point;
return true; return true;
} }
@@ -280,12 +292,22 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
else else
return parse_literal(); return parse_literal();
case regex_constants::syntax_open_brace: case regex_constants::syntax_open_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
return parse_literal();
++m_position; ++m_position;
return parse_repeat_range(true); return parse_repeat_range(true);
case regex_constants::syntax_close_brace: case regex_constants::syntax_close_brace:
if(this->m_pdata->m_flags & regbase::no_intervals)
return parse_literal();
fail(REG_EBRACE, this->m_position - this->m_base); fail(REG_EBRACE, this->m_position - this->m_base);
result = false; result = false;
break; break;
case regex_constants::syntax_or:
if(this->m_pdata->m_flags & regbase::bk_vbar)
return parse_alt();
else
result = parse_literal();
break;
default: default:
result = parse_literal(); result = parse_literal();
break; break;
@@ -482,6 +504,50 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
return parse_repeat(min, max); return parse_repeat(min, max);
} }
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_alt()
{
//
// error check: if there have been no previous states,
// or if the last state was a '(' then error:
//
if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
fail(REG_EMPTY, this->m_position - this->m_base);
++m_position;
//
// we need to append a trailing jump, then insert the alternative:
//
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
std::ptrdiff_t jump_offset = this->getoffset(pj);
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
jump_offset += re_alt_size;
this->m_pdata->m_data.align();
palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
//
// update m_alt_insert_point so that the next alternate gets
// inserted at the start of the second of the two we've just created:
//
this->m_alt_insert_point = this->m_pdata->m_data.size();
//
// recursively add states:
//
bool result = this->parse_all();
//
// if we didn't actually add any trailing states then that's an error:
//
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
fail(REG_EMPTY, this->m_position - this->m_base);
//
// fix up the jump we added to point to the end of the states
// that we're just added:
//
this->m_pdata->m_data.align();
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
return result;
}
} // namespace re_detail } // namespace re_detail
} // namespace boost } // namespace boost

View File

@@ -59,6 +59,7 @@ public:
no_char_classes = 1 << 8, // [[:CLASS:]] not allowed no_char_classes = 1 << 8, // [[:CLASS:]] not allowed
no_intervals = 1 << 9, // {x,y} not allowed no_intervals = 1 << 9, // {x,y} not allowed
bk_plus_qm = 1 << 10, // uses \+ and \? bk_plus_qm = 1 << 10, // uses \+ and \?
bk_vbar = 1 << 11, // use \| for alternatives
// //
// options common to all groups: // options common to all groups:
@@ -120,6 +121,8 @@ namespace regex_constants{
nosubs = ::boost::regbase::nosubs, nosubs = ::boost::regbase::nosubs,
optimize = ::boost::regbase::optimize, optimize = ::boost::regbase::optimize,
bk_plus_qm = ::boost::regbase::bk_plus_qm, bk_plus_qm = ::boost::regbase::bk_plus_qm,
bk_vbar = ::boost::regbase::bk_vbar,
no_intervals = ::boost::regbase::no_intervals,
basic = ::boost::regbase::basic, basic = ::boost::regbase::basic,
extended = ::boost::regbase::extended, extended = ::boost::regbase::extended,

View File

@@ -215,7 +215,8 @@ We provide this so we know how manybytes to insert when constructing the machine
enum re_jump_size_type enum re_jump_size_type
{ {
re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask) re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
}; };
/*** proc re_is_set_member ********************************************* /*** proc re_is_set_member *********************************************

View File

@@ -180,29 +180,32 @@ void basic_tests()
TEST_INVALID_REGEX("a\\{1b\\}", basic); TEST_INVALID_REGEX("a\\{1b\\}", basic);
TEST_INVALID_REGEX("a\\{1,b\\}", basic); TEST_INVALID_REGEX("a\\{1,b\\}", basic);
TEST_INVALID_REGEX("a\\{1,2v\\}", basic); TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
#if 0
; now test the alternation operator | // now test the alternation operator |
- match_default normal REG_EXTENDED TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
a|b a 0 1 TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
a|b b 0 1 TEST_REGEX_SEARCH("a|b|c", perl, "c", match_default, make_array(0, 1, -2, -2));
a(b|c) ab 0 2 1 2 TEST_REGEX_SEARCH("a|(b)|.", perl, "b", match_default, make_array(0, 1, 0, 1, -2, -2));
a(b|c) ac 0 2 1 2 TEST_REGEX_SEARCH("(a)|b|.", perl, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
a(b|c) ad -1 -1 -1 -1 TEST_REGEX_SEARCH("a(b|c)", perl, "ab", match_default, make_array(0, 2, 1, 2, -2, -2));
|c ! TEST_REGEX_SEARCH("a(b|c)", perl, "ac", match_default, make_array(0, 2, 1, 2, -2, -2));
c| ! TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2));
(|) ! TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
(a|) ! TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
(|a) ! TEST_INVALID_REGEX("|c", perl);
a\| a| 0 2 TEST_INVALID_REGEX("c|", perl);
- match_default normal limited_ops TEST_INVALID_REGEX("(|)", perl);
a| a| 0 2 TEST_INVALID_REGEX("(a|)", perl);
a\| a| 0 2 TEST_INVALID_REGEX("(|a)", perl);
| | 0 1 TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2));
- match_default normal bk_vbar REG_NO_POSIX_TEST
a| a| 0 2 TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2));
a\|b a 0 1 TEST_REGEX_SEARCH("a\\|", basic, "a|", match_default, make_array(0, 2, -2, -2));
a\|b b 0 1 TEST_REGEX_SEARCH("|", basic, "|", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
#if 0
; now test the set operator [] ; now test the set operator []
- match_default normal REG_EXTENDED - match_default normal REG_EXTENDED