forked from boostorg/regex
Alternatives now work.
[SVN r22525]
This commit is contained in:
@@ -348,14 +348,13 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
|||||||
state = static_cast<re_alt*>(state)->alt.p;
|
state = static_cast<re_alt*>(state)->alt.p;
|
||||||
break;;
|
break;;
|
||||||
case syntax_element_alt:
|
case syntax_element_alt:
|
||||||
assert(0);
|
|
||||||
case syntax_element_rep:
|
case syntax_element_rep:
|
||||||
case syntax_element_dot_rep:
|
case syntax_element_dot_rep:
|
||||||
case syntax_element_char_rep:
|
case syntax_element_char_rep:
|
||||||
case syntax_element_short_set_rep:
|
case syntax_element_short_set_rep:
|
||||||
case syntax_element_long_set_rep:
|
case syntax_element_long_set_rep:
|
||||||
{
|
{
|
||||||
re_alt* rep = static_cast<re_repeat*>(state);
|
re_alt* rep = static_cast<re_alt*>(state);
|
||||||
if(rep->_map[0] & mask_init)
|
if(rep->_map[0] & mask_init)
|
||||||
{
|
{
|
||||||
if(map)
|
if(map)
|
||||||
|
@@ -44,6 +44,7 @@ public:
|
|||||||
bool parse_match_any();
|
bool parse_match_any();
|
||||||
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
|
bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
|
||||||
bool parse_repeat_range(bool isbasic);
|
bool parse_repeat_range(bool isbasic);
|
||||||
|
bool parse_alt();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef bool (basic_regex_parser::*parser_proc_type)();
|
typedef bool (basic_regex_parser::*parser_proc_type)();
|
||||||
@@ -54,6 +55,7 @@ private:
|
|||||||
unsigned m_mark_count; // how many sub-expressions we have
|
unsigned m_mark_count; // how many sub-expressions we have
|
||||||
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
||||||
unsigned m_repeater_id; // the id of the next repeater
|
unsigned m_repeater_id; // the id of the next repeater
|
||||||
|
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
||||||
|
|
||||||
basic_regex_parser& operator=(const basic_regex_parser&);
|
basic_regex_parser& operator=(const basic_regex_parser&);
|
||||||
basic_regex_parser(const basic_regex_parser&);
|
basic_regex_parser(const basic_regex_parser&);
|
||||||
@@ -61,7 +63,7 @@ private:
|
|||||||
|
|
||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
||||||
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0)
|
: basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -201,6 +203,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
|
|||||||
BOOST_ASSERT(0);
|
BOOST_ASSERT(0);
|
||||||
result = false;
|
result = false;
|
||||||
break;
|
break;
|
||||||
|
case regex_constants::syntax_or:
|
||||||
|
return parse_alt();
|
||||||
default:
|
default:
|
||||||
result = parse_literal();
|
result = parse_literal();
|
||||||
break;
|
break;
|
||||||
@@ -230,6 +234,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
|||||||
pb->index = markid;
|
pb->index = markid;
|
||||||
++m_position;
|
++m_position;
|
||||||
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
||||||
|
// back up insertion point for alternations, and set new point:
|
||||||
|
std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
||||||
|
this->m_pdata->m_data.align();
|
||||||
|
m_alt_insert_point = this->m_pdata->m_data.size();
|
||||||
//
|
//
|
||||||
// now recursively add more states, this will terminate when we get to a
|
// now recursively add more states, this will terminate when we get to a
|
||||||
// matching ')' :
|
// matching ')' :
|
||||||
@@ -248,6 +256,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
|
|||||||
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
||||||
pb->index = markid;
|
pb->index = markid;
|
||||||
this->m_paren_start = last_paren_start;
|
this->m_paren_start = last_paren_start;
|
||||||
|
//
|
||||||
|
// restore the alternate insertion point:
|
||||||
|
//
|
||||||
|
this->m_alt_insert_point = last_alt_point;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -280,12 +292,22 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
|||||||
else
|
else
|
||||||
return parse_literal();
|
return parse_literal();
|
||||||
case regex_constants::syntax_open_brace:
|
case regex_constants::syntax_open_brace:
|
||||||
|
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||||
|
return parse_literal();
|
||||||
++m_position;
|
++m_position;
|
||||||
return parse_repeat_range(true);
|
return parse_repeat_range(true);
|
||||||
case regex_constants::syntax_close_brace:
|
case regex_constants::syntax_close_brace:
|
||||||
|
if(this->m_pdata->m_flags & regbase::no_intervals)
|
||||||
|
return parse_literal();
|
||||||
fail(REG_EBRACE, this->m_position - this->m_base);
|
fail(REG_EBRACE, this->m_position - this->m_base);
|
||||||
result = false;
|
result = false;
|
||||||
break;
|
break;
|
||||||
|
case regex_constants::syntax_or:
|
||||||
|
if(this->m_pdata->m_flags & regbase::bk_vbar)
|
||||||
|
return parse_alt();
|
||||||
|
else
|
||||||
|
result = parse_literal();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
result = parse_literal();
|
result = parse_literal();
|
||||||
break;
|
break;
|
||||||
@@ -482,6 +504,50 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
return parse_repeat(min, max);
|
return parse_repeat(min, max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class charT, class traits>
|
||||||
|
bool basic_regex_parser<charT, traits>::parse_alt()
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// error check: if there have been no previous states,
|
||||||
|
// or if the last state was a '(' then error:
|
||||||
|
//
|
||||||
|
if((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
|
||||||
|
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||||
|
++m_position;
|
||||||
|
//
|
||||||
|
// we need to append a trailing jump, then insert the alternative:
|
||||||
|
//
|
||||||
|
re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
|
||||||
|
std::ptrdiff_t jump_offset = this->getoffset(pj);
|
||||||
|
re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
|
||||||
|
jump_offset += re_alt_size;
|
||||||
|
this->m_pdata->m_data.align();
|
||||||
|
palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
|
||||||
|
//
|
||||||
|
// update m_alt_insert_point so that the next alternate gets
|
||||||
|
// inserted at the start of the second of the two we've just created:
|
||||||
|
//
|
||||||
|
this->m_alt_insert_point = this->m_pdata->m_data.size();
|
||||||
|
//
|
||||||
|
// recursively add states:
|
||||||
|
//
|
||||||
|
bool result = this->parse_all();
|
||||||
|
//
|
||||||
|
// if we didn't actually add any trailing states then that's an error:
|
||||||
|
//
|
||||||
|
if(this->m_alt_insert_point == this->m_pdata->m_data.size())
|
||||||
|
fail(REG_EMPTY, this->m_position - this->m_base);
|
||||||
|
//
|
||||||
|
// fix up the jump we added to point to the end of the states
|
||||||
|
// that we're just added:
|
||||||
|
//
|
||||||
|
this->m_pdata->m_data.align();
|
||||||
|
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
||||||
|
jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace re_detail
|
} // namespace re_detail
|
||||||
} // namespace boost
|
} // namespace boost
|
||||||
|
|
||||||
|
@@ -59,6 +59,7 @@ public:
|
|||||||
no_char_classes = 1 << 8, // [[:CLASS:]] not allowed
|
no_char_classes = 1 << 8, // [[:CLASS:]] not allowed
|
||||||
no_intervals = 1 << 9, // {x,y} not allowed
|
no_intervals = 1 << 9, // {x,y} not allowed
|
||||||
bk_plus_qm = 1 << 10, // uses \+ and \?
|
bk_plus_qm = 1 << 10, // uses \+ and \?
|
||||||
|
bk_vbar = 1 << 11, // use \| for alternatives
|
||||||
|
|
||||||
//
|
//
|
||||||
// options common to all groups:
|
// options common to all groups:
|
||||||
@@ -120,6 +121,8 @@ namespace regex_constants{
|
|||||||
nosubs = ::boost::regbase::nosubs,
|
nosubs = ::boost::regbase::nosubs,
|
||||||
optimize = ::boost::regbase::optimize,
|
optimize = ::boost::regbase::optimize,
|
||||||
bk_plus_qm = ::boost::regbase::bk_plus_qm,
|
bk_plus_qm = ::boost::regbase::bk_plus_qm,
|
||||||
|
bk_vbar = ::boost::regbase::bk_vbar,
|
||||||
|
no_intervals = ::boost::regbase::no_intervals,
|
||||||
|
|
||||||
basic = ::boost::regbase::basic,
|
basic = ::boost::regbase::basic,
|
||||||
extended = ::boost::regbase::extended,
|
extended = ::boost::regbase::extended,
|
||||||
|
@@ -215,7 +215,8 @@ We provide this so we know how manybytes to insert when constructing the machine
|
|||||||
enum re_jump_size_type
|
enum re_jump_size_type
|
||||||
{
|
{
|
||||||
re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
|
re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
|
||||||
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask)
|
re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
|
||||||
|
re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
|
||||||
};
|
};
|
||||||
|
|
||||||
/*** proc re_is_set_member *********************************************
|
/*** proc re_is_set_member *********************************************
|
||||||
|
@@ -180,29 +180,32 @@ void basic_tests()
|
|||||||
TEST_INVALID_REGEX("a\\{1b\\}", basic);
|
TEST_INVALID_REGEX("a\\{1b\\}", basic);
|
||||||
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
|
TEST_INVALID_REGEX("a\\{1,b\\}", basic);
|
||||||
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
|
TEST_INVALID_REGEX("a\\{1,2v\\}", basic);
|
||||||
#if 0
|
|
||||||
|
|
||||||
; now test the alternation operator |
|
// now test the alternation operator |
|
||||||
- match_default normal REG_EXTENDED
|
TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
|
||||||
a|b a 0 1
|
TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
|
||||||
a|b b 0 1
|
TEST_REGEX_SEARCH("a|b|c", perl, "c", match_default, make_array(0, 1, -2, -2));
|
||||||
a(b|c) ab 0 2 1 2
|
TEST_REGEX_SEARCH("a|(b)|.", perl, "b", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||||
a(b|c) ac 0 2 1 2
|
TEST_REGEX_SEARCH("(a)|b|.", perl, "a", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||||
a(b|c) ad -1 -1 -1 -1
|
TEST_REGEX_SEARCH("a(b|c)", perl, "ab", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||||
|c !
|
TEST_REGEX_SEARCH("a(b|c)", perl, "ac", match_default, make_array(0, 2, 1, 2, -2, -2));
|
||||||
c| !
|
TEST_REGEX_SEARCH("a(b|c)", perl, "ad", match_default, make_array(-2, -2));
|
||||||
(|) !
|
TEST_REGEX_SEARCH("(a|b|c)", perl, "c", match_default, make_array(0, 1, 0, 1, -2, -2));
|
||||||
(a|) !
|
TEST_REGEX_SEARCH("(a|(b)|.)", perl, "b", match_default, make_array(0, 1, 0, 1, 0, 1, -2, -2));
|
||||||
(|a) !
|
TEST_INVALID_REGEX("|c", perl);
|
||||||
a\| a| 0 2
|
TEST_INVALID_REGEX("c|", perl);
|
||||||
- match_default normal limited_ops
|
TEST_INVALID_REGEX("(|)", perl);
|
||||||
a| a| 0 2
|
TEST_INVALID_REGEX("(a|)", perl);
|
||||||
a\| a| 0 2
|
TEST_INVALID_REGEX("(|a)", perl);
|
||||||
| | 0 1
|
TEST_REGEX_SEARCH("a\\|", perl, "a|", match_default, make_array(0, 2, -2, -2));
|
||||||
- match_default normal bk_vbar REG_NO_POSIX_TEST
|
|
||||||
a| a| 0 2
|
TEST_REGEX_SEARCH("a|", basic, "a|", match_default, make_array(0, 2, -2, -2));
|
||||||
a\|b a 0 1
|
TEST_REGEX_SEARCH("a\\|", basic, "a|", match_default, make_array(0, 2, -2, -2));
|
||||||
a\|b b 0 1
|
TEST_REGEX_SEARCH("|", basic, "|", match_default, make_array(0, 1, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
|
||||||
|
TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
|
||||||
|
#if 0
|
||||||
|
|
||||||
; now test the set operator []
|
; now test the set operator []
|
||||||
- match_default normal REG_EXTENDED
|
- match_default normal REG_EXTENDED
|
||||||
|
Reference in New Issue
Block a user