mirror of
https://github.com/boostorg/regex.git
synced 2025-07-30 04:27:22 +02:00
Alternatives now work.
[SVN r22525]
This commit is contained in:
@ -348,14 +348,13 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
state = static_cast<re_alt*>(state)->alt.p;
|
||||
break;;
|
||||
case syntax_element_alt:
|
||||
assert(0);
|
||||
case syntax_element_rep:
|
||||
case syntax_element_dot_rep:
|
||||
case syntax_element_char_rep:
|
||||
case syntax_element_short_set_rep:
|
||||
case syntax_element_long_set_rep:
|
||||
{
|
||||
re_alt* rep = static_cast<re_repeat*>(state);
|
||||
re_alt* rep = static_cast<re_alt*>(state);
|
||||
if(rep->_map[0] & mask_init)
|
||||
{
|
||||
if(map)
|
||||
|
@ -44,6 +44,7 @@ public:
|
||||
bool parse_match_any();
|
||||
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
|
||||
bool parse_repeat_range(bool isbasic);
|
||||
bool parse_alt();
|
||||
|
||||
private:
|
||||
typedef bool (basic_regex_parser::*parser_proc_type)();
|
||||
@ -54,6 +55,7 @@ private:
|
||||
unsigned m_mark_count; // how many sub-expressions we have
|
||||
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
||||
unsigned m_repeater_id; // the id of the next repeater
|
||||
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
||||
|
||||
basic_regex_parser& operator=(const basic_regex_parser&);
|
||||
basic_regex_parser(const basic_regex_parser&);
|
||||
@ -61,7 +63,7 @@ private:
|
||||
|
||||
template <class charT, class traits>
|
||||
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0)
|
||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -201,6 +203,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
||||
BOOST_ASSERT(0);
|
||||
result = false;
|
||||
break;
|
||||
case regex_constants::syntax_or:
|
||||
return parse_alt();
|
||||
default:
|
||||
result = parse_literal();
|
||||
break;
|
||||
@ -230,6 +234,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
pb->index = markid;
|
||||
++m_position;
|
||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||
// back up insertion point for alternations, and set new point:
|
||||
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||
this->m_pdata->m_data.align();
|
||||
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
//
|
||||
// now recursively add more states, this will terminate when we get to a
|
||||
// matching ')' :
|
||||
@ -248,6 +256,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||
pb->index = markid;
|
||||
this->m_paren_start = last_paren_start;
|
||||
//
|
||||
// restore the alternate insertion point:
|
||||
//
|
||||
this->m_alt_insert_point = last_alt_point;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -280,12 +292,22 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
||||
else
|
||||
return parse_literal();
|
||||
case regex_constants::syntax_open_brace:
|
||||
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||
return parse_literal();
|
||||
++m_position;
|
||||
return parse_repeat_range(true);
|
||||
case regex_constants::syntax_close_brace:
|
||||
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||
return parse_literal();
|
||||
fail(REG_EBRACE, this->m_position - this->m_base);
|
||||
result = false;
|
||||
break;
|
||||
case regex_constants::syntax_or:
|
||||
if(this->m_pdata->m_flags & regbase::bk_vbar)
|
||||
return parse_alt();
|
||||
else
|
||||
result = parse_literal();
|
||||
break;
|
||||
default:
|
||||
result = parse_literal();
|
||||
break;
|
||||
@ -482,6 +504,50 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
||||
return parse_repeat(min, max);
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
bool basic_regex_parser<charT, traits>::parse_alt()
|
||||
{
|
||||
//
|
||||
// error check: if there have been no previous states,
|
||||
// or if the last state was a '(' then error:
|
||||
//
|
||||
if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
|
||||
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||
++m_position;
|
||||
//
|
||||
// we need to append a trailing jump, then insert the alternative:
|
||||
//
|
||||
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
|
||||
std::ptrdiff_t jump_offset = this->getoffset(pj);
|
||||
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
|
||||
jump_offset += re_alt_size;
|
||||
this->m_pdata->m_data.align();
|
||||
palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
|
||||
//
|
||||
// update m_alt_insert_point so that the next alternate gets
|
||||
// inserted at the start of the second of the two we've just created:
|
||||
//
|
||||
this->m_alt_insert_point = this->m_pdata->m_data.size();
|
||||
//
|
||||
// recursively add states:
|
||||
//
|
||||
bool result = this->parse_all();
|
||||
//
|
||||
// if we didn't actually add any trailing states then that's an error:
|
||||
//
|
||||
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
|
||||
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||
//
|
||||
// fix up the jump we added to point to the end of the states
|
||||
// that we're just added:
|
||||
//
|
||||
this->m_pdata->m_data.align();
|
||||
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
||||
jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace re_detail
|
||||
} // namespace boost
|
||||
|
||||
|
@ -59,6 +59,7 @@ public:
|
||||
no_char_classes = 1 << 8, // [[:CLASS:]] not allowed
|
||||
no_intervals = 1 << 9, // {x,y} not allowed
|
||||
bk_plus_qm = 1 << 10, // uses \+ and \?
|
||||
bk_vbar = 1 << 11, // use \| for alternatives
|
||||
|
||||
//
|
||||
// options common to all groups:
|
||||
@ -120,6 +121,8 @@ namespace regex_constants{
|
||||
nosubs = ::boost::regbase::nosubs,
|
||||
optimize = ::boost::regbase::optimize,
|
||||
bk_plus_qm = ::boost::regbase::bk_plus_qm,
|
||||
bk_vbar = ::boost::regbase::bk_vbar,
|
||||
no_intervals = ::boost::regbase::no_intervals,
|
||||
|
||||
basic = ::boost::regbase::basic,
|
||||
extended = ::boost::regbase::extended,
|
||||
|
@ -215,7 +215,8 @@ We provide this so we know how manybytes to insert when constructing the machine
|
||||
enum re_jump_size_type
|
||||
{
|
||||
re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
|
||||
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask)
|
||||
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
|
||||
re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
|
||||
};
|
||||
|
||||
/*** proc re_is_set_member *********************************************
|
||||
|
@ -180,29 +180,32 @@ void basic_tests()
|
||||
TEST_INVALID_REGEX("a\\{1b\\}", basic);
|
||||
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
|
||||
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
|
||||
#if 0
|
||||
|
||||
; now test the alternation operator |
|
||||
- match_default normal REG_EXTENDED
|
||||
a|b a 0 1
|
||||
a|b b 0 1
|
||||
a(b|c) ab 0 2 1 2
|
||||
a(b|c) ac 0 2 1 2
|
||||
a(b|c) ad -1 -1 -1 -1
|
||||
|c !
|
||||
c| !
|
||||
(|) !
|
||||
(a|) !
|
||||
(|a) !
|
||||
a\| a| 0 2
|
||||
- match_default normal limited_ops
|
||||
a| a| 0 2
|
||||
a\| a| 0 2
|
||||
| | 0 1
|
||||
- match_default normal bk_vbar REG_NO_POSIX_TEST
|
||||
a| a| 0 2
|
||||
a\|b a 0 1
|
||||
a\|b b 0 1
|
||||
// now test the alternation operator |
|
||||
TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a|b|c", perl, "c", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a|(b)|.", perl, "b", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a)|b|.", perl, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)", perl, "ab", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)", perl, "ac", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
|
||||
TEST_INVALID_REGEX("|c", perl);
|
||||
TEST_INVALID_REGEX("c|", perl);
|
||||
TEST_INVALID_REGEX("(|)", perl);
|
||||
TEST_INVALID_REGEX("(a|)", perl);
|
||||
TEST_INVALID_REGEX("(|a)", perl);
|
||||
TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|", basic, "a|", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("|", basic, "|", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
|
||||
#if 0
|
||||
|
||||
; now test the set operator []
|
||||
- match_default normal REG_EXTENDED
|
||||
|
Reference in New Issue
Block a user