Start adding Perl verb support with FAIL as the first supported verb.

This commit is contained in:
jzmaddock
2015-09-25 19:30:20 +01:00
parent d8c95a9950
commit b557febb0e
9 changed files with 93 additions and 3 deletions

View File

@ -68,6 +68,8 @@ public:
bool parse_inner_set(basic_char_set<charT, traits>& char_set); bool parse_inner_set(basic_char_set<charT, traits>& char_set);
bool parse_QE(); bool parse_QE();
bool parse_perl_extension(); bool parse_perl_extension();
bool parse_perl_verb();
bool match_verb(const char*);
bool add_emacs_code(bool negate); bool add_emacs_code(bool negate);
bool unwind_alts(std::ptrdiff_t last_paren_start); bool unwind_alts(std::ptrdiff_t last_paren_start);
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set); digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
@ -421,6 +423,8 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
{ {
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
return parse_perl_extension(); return parse_perl_extension();
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
return parse_perl_verb();
} }
// //
// update our mark count, and append the required state: // update our mark count, and append the required state:
@ -2652,6 +2656,70 @@ option_group_jump:
return true; return true;
} }
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
{
while(*verb)
{
if(static_cast<charT>(*verb) != *m_position)
{
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if(++m_position == m_end)
{
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++verb;
}
return true;
}
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_perl_verb()
{
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
switch(*m_position)
{
case 'F':
if(++m_position == m_end)
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
{
if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
{
// Rewind to start of (* sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
fail(regex_constants::error_perl_extension, m_position - m_base);
return false;
}
++m_position;
this->append_state(syntax_element_fail);
return true;
}
return false;
}
return false;
}
template <class charT, class traits> template <class charT, class traits>
bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate) bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
{ {

View File

@ -443,6 +443,7 @@ private:
bool backtrack_till_match(std::size_t count); bool backtrack_till_match(std::size_t count);
#endif #endif
bool match_recursion(); bool match_recursion();
bool match_fail();
// find procs stored in s_find_vtable: // find procs stored in s_find_vtable:
bool find_restart_any(); bool find_restart_any();

View File

@ -793,6 +793,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
return true; return true;
} }
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
{
// Just force a backtrack:
return false;
}
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any() bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()

View File

@ -141,7 +141,7 @@ struct saved_recursion : public saved_state
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{ {
static matcher_proc_type const s_match_vtable[30] = static matcher_proc_type const s_match_vtable[31] =
{ {
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark, &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -177,6 +177,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion, &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
}; };
push_recursion_stopper(); push_recursion_stopper();

View File

@ -60,7 +60,7 @@ public:
template <class BidiIterator, class Allocator, class traits> template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
{ {
static matcher_proc_type const s_match_vtable[30] = static matcher_proc_type const s_match_vtable[31] =
{ {
(&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
&perl_matcher<BidiIterator, Allocator, traits>::match_endmark, &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
@ -96,6 +96,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
&perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
&perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
&perl_matcher<BidiIterator, Allocator, traits>::match_recursion, &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
&perl_matcher<BidiIterator, Allocator, traits>::match_fail,
}; };
if(state_count > max_state_count) if(state_count > max_state_count)

View File

@ -120,7 +120,9 @@ enum syntax_element_type
syntax_element_assert_backref = syntax_element_backstep + 1, syntax_element_assert_backref = syntax_element_backstep + 1,
syntax_element_toggle_case = syntax_element_assert_backref + 1, syntax_element_toggle_case = syntax_element_assert_backref + 1,
// a recursive expression: // a recursive expression:
syntax_element_recurse = syntax_element_toggle_case + 1 syntax_element_recurse = syntax_element_toggle_case + 1,
// Verbs:
syntax_element_fail = syntax_element_recurse + 1,
}; };
#ifdef BOOST_REGEX_DEBUG #ifdef BOOST_REGEX_DEBUG

View File

@ -82,6 +82,7 @@ void run_tests()
RUN_TESTS(test_pocessive_repeats); RUN_TESTS(test_pocessive_repeats);
RUN_TESTS(test_mark_resets); RUN_TESTS(test_mark_resets);
RUN_TESTS(test_recursion); RUN_TESTS(test_recursion);
RUN_TESTS(test_verbs);
} }
int cpp_main(int /*argc*/, char * /*argv*/[]) int cpp_main(int /*argc*/, char * /*argv*/[])

View File

@ -288,5 +288,6 @@ void test_unicode();
void test_pocessive_repeats(); void test_pocessive_repeats();
void test_mark_resets(); void test_mark_resets();
void test_recursion(); void test_recursion();
void test_verbs();
#endif #endif

View File

@ -932,3 +932,11 @@ void test_recursion()
TEST_REGEX_SEARCH("(?:(?<A>a+)|(?<A>b+)|c+)\\.\\k<A>", perl, "cccc.cccc", match_default, make_array(-2, -2)); TEST_REGEX_SEARCH("(?:(?<A>a+)|(?<A>b+)|c+)\\.\\k<A>", perl, "cccc.cccc", match_default, make_array(-2, -2));
} }
void test_verbs()
{
using namespace boost::regex_constants;
TEST_INVALID_REGEX("a+(*", perl);
TEST_INVALID_REGEX("a+(*FX)", perl);
TEST_REGEX_SEARCH("a+(*FAIL)b", perl, "aaaab", match_default, make_array(-2, -2));
}