mirror of
https://github.com/boostorg/regex.git
synced 2025-07-16 22:02:08 +02:00
Consolidated patches and performance tweeks.
[SVN r31492]
This commit is contained in:
@ -66,6 +66,7 @@ struct regex_data
|
||||
unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
|
||||
unsigned int m_can_be_null; // whether we can match a null string
|
||||
re_detail::raw_storage m_data; // the buffer in which our states are constructed
|
||||
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
|
||||
};
|
||||
//
|
||||
// class basic_regex_implementation
|
||||
|
@ -265,6 +265,7 @@ basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits
|
||||
m_lower_mask = m_traits.lookup_classname(l, l + 5);
|
||||
m_upper_mask = m_traits.lookup_classname(u, u + 5);
|
||||
m_alpha_mask = m_traits.lookup_classname(a, a + 5);
|
||||
m_pdata->m_word_mask = m_word_mask;
|
||||
BOOST_ASSERT(m_word_mask != 0);
|
||||
BOOST_ASSERT(m_mask_space != 0);
|
||||
BOOST_ASSERT(m_lower_mask != 0);
|
||||
@ -765,7 +766,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
{
|
||||
// Oops error:
|
||||
if(0 == this->m_pdata->m_status) // update the error code if not already set
|
||||
this->m_pdata->m_status = boost::regex_constants::error_brack;
|
||||
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
|
||||
//
|
||||
// clear the expression, we should be empty:
|
||||
//
|
||||
@ -776,8 +777,8 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
|
||||
//
|
||||
if(0 == (this->flags() & regex_constants::no_except))
|
||||
{
|
||||
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_brack);
|
||||
boost::regex_error e(message, boost::regex_constants::error_brack, 0);
|
||||
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
|
||||
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
|
||||
e.raise();
|
||||
}
|
||||
}
|
||||
|
@ -766,6 +766,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
|
||||
case syntax_element_soft_buffer_end:
|
||||
case syntax_element_restart_continue:
|
||||
case syntax_element_jump:
|
||||
case syntax_element_startmark:
|
||||
// can't legally repeat any of the above:
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
@ -1342,6 +1343,10 @@ bool valid_value(charT c, int v)
|
||||
template <class charT, class traits>
|
||||
charT basic_regex_parser<charT, traits>::unescape_character()
|
||||
{
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4127)
|
||||
#endif
|
||||
charT result(0);
|
||||
if(m_position == m_end)
|
||||
{
|
||||
@ -1494,6 +1499,9 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
||||
}
|
||||
++m_position;
|
||||
return result;
|
||||
#ifdef BOOST_MSVC
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class charT, class traits>
|
||||
@ -1737,6 +1745,9 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
||||
}
|
||||
break;
|
||||
}
|
||||
case regex_constants::syntax_close_mark:
|
||||
fail(regex_constants::error_badrepeat, m_position - m_base);
|
||||
return false;
|
||||
default:
|
||||
//
|
||||
// lets assume that we have a (?imsx) group and try and parse it:
|
||||
|
@ -167,10 +167,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL
|
||||
|
||||
namespace re_detail{
|
||||
template BOOST_REGEX_DECL void perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::construct_init(
|
||||
BOOST_REGEX_CHAR_T const * first, BOOST_REGEX_CHAR_T const * end,
|
||||
match_results<BOOST_REGEX_CHAR_T const *>& what,
|
||||
const basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& e,
|
||||
match_flag_type f);
|
||||
const basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& e, match_flag_type f);
|
||||
template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::match();
|
||||
template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::find();
|
||||
} // namespace
|
||||
@ -186,10 +183,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL
|
||||
|
||||
namespace re_detail{
|
||||
template BOOST_REGEX_DECL void perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::construct_init(
|
||||
std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator first, std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator end,
|
||||
match_results<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator>& what,
|
||||
const basic_regex<BOOST_REGEX_CHAR_T>& e,
|
||||
match_flag_type f);
|
||||
const basic_regex<BOOST_REGEX_CHAR_T>& e, match_flag_type f);
|
||||
template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::match();
|
||||
template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::find();
|
||||
} // namespace
|
||||
|
@ -29,7 +29,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::sy
|
||||
// function can_start:
|
||||
//
|
||||
template <class charT>
|
||||
bool can_start(charT c, const unsigned char* map, unsigned char mask)
|
||||
inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
|
||||
{
|
||||
return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
|
||||
}
|
||||
@ -327,7 +327,13 @@ public:
|
||||
match_results<BidiIterator, Allocator>& what,
|
||||
const basic_regex<char_type, traits>& e,
|
||||
match_flag_type f,
|
||||
BidiIterator base);
|
||||
BidiIterator base)
|
||||
: m_result(what), base(first), last(end),
|
||||
position(first), backstop(base), re(e), traits_inst(e.get_traits()),
|
||||
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
|
||||
{
|
||||
construct_init(e, f);
|
||||
}
|
||||
|
||||
bool match();
|
||||
bool find();
|
||||
@ -338,10 +344,8 @@ public:
|
||||
{ m_match_flags &= ~f; }
|
||||
|
||||
private:
|
||||
void construct_init(BidiIterator first, BidiIterator end,
|
||||
match_results<BidiIterator, Allocator>& what,
|
||||
const basic_regex<char_type, traits>& e,
|
||||
match_flag_type f);
|
||||
void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
|
||||
|
||||
bool find_imp();
|
||||
bool match_imp();
|
||||
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
|
||||
|
@ -32,25 +32,10 @@ namespace boost{
|
||||
namespace re_detail{
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first, BidiIterator end,
|
||||
match_results<BidiIterator, Allocator>& what,
|
||||
const basic_regex<char_type, traits>& e,
|
||||
match_flag_type f,
|
||||
BidiIterator b)
|
||||
: m_result(what), base(first), last(end),
|
||||
position(first), backstop(b), re(e), traits_inst(e.get_traits()),
|
||||
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
|
||||
{
|
||||
construct_init(first, last, what, e, f);
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator first, BidiIterator end,
|
||||
match_results<BidiIterator, Allocator>& what,
|
||||
const basic_regex<char_type, traits>& e,
|
||||
match_flag_type f)
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
|
||||
{
|
||||
typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
|
||||
typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
|
||||
|
||||
if(e.empty())
|
||||
{
|
||||
@ -60,13 +45,14 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator
|
||||
}
|
||||
pstate = 0;
|
||||
m_match_flags = f;
|
||||
icase = re.flags() & regex_constants::icase;
|
||||
estimate_max_state_count(static_cast<category*>(0));
|
||||
expression_flag_type re_f = re.flags();
|
||||
icase = re_f & regex_constants::icase;
|
||||
if(!(m_match_flags & (match_perl|match_posix)))
|
||||
{
|
||||
if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
|
||||
if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
|
||||
m_match_flags |= match_perl;
|
||||
else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
||||
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
||||
m_match_flags |= match_perl;
|
||||
else
|
||||
m_match_flags |= match_posix;
|
||||
@ -83,8 +69,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator
|
||||
m_backup_state = 0;
|
||||
#endif
|
||||
// find the value to use for matching word boundaries:
|
||||
const char_type w = static_cast<char_type>('w');
|
||||
m_word_mask = traits_inst.lookup_classname(&w, &w+1);
|
||||
m_word_mask = re.get_data().m_word_mask;
|
||||
// find bitmask to use for matching '.':
|
||||
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
|
||||
}
|
||||
@ -104,7 +89,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
|
||||
inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
|
||||
{
|
||||
// we don't know how long the sequence is:
|
||||
max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
|
||||
@ -112,21 +97,9 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(voi
|
||||
|
||||
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
|
||||
inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
|
||||
protected_proc_type proc)
|
||||
{
|
||||
/*
|
||||
__try{
|
||||
return (this->*proc)();
|
||||
}__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode())
|
||||
{
|
||||
reset_stack_guard_page();
|
||||
}
|
||||
// we only get here after a stack overflow:
|
||||
raise_error<traits>(traits_inst, regex_constants::error_size);
|
||||
// and we never really get here at all:
|
||||
return false;
|
||||
*/
|
||||
::boost::re_detail::concrete_protected_call
|
||||
<perl_matcher<BidiIterator, Allocator, traits> >
|
||||
obj(this, proc);
|
||||
@ -136,7 +109,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
|
||||
#endif
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::match()
|
||||
inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
|
||||
{
|
||||
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
|
||||
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
|
||||
@ -185,7 +158,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
|
||||
}
|
||||
|
||||
template <class BidiIterator, class Allocator, class traits>
|
||||
bool perl_matcher<BidiIterator, Allocator, traits>::find()
|
||||
inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
|
||||
{
|
||||
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
|
||||
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
|
||||
|
@ -164,7 +164,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
|
||||
raise_error(traits_inst, regex_constants::error_space);
|
||||
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
|
||||
m_has_partial_match = true;
|
||||
if(false == unwind(false))
|
||||
bool successful_unwind = unwind(false);
|
||||
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
|
||||
m_has_partial_match = true;
|
||||
if(false == successful_unwind)
|
||||
return m_recursive_result;
|
||||
}
|
||||
}
|
||||
|
@ -501,25 +501,39 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
|
||||
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
|
||||
BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
|
||||
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
|
||||
unsigned count = 0;
|
||||
//
|
||||
// start by working out how much we can skip:
|
||||
//
|
||||
bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
|
||||
std::size_t desired = greedy ? rep->max : rep->min;
|
||||
std::size_t count, desired;
|
||||
if(::boost::is_random_access_iterator<BidiIterator>::value)
|
||||
{
|
||||
BidiIterator end = position;
|
||||
std::advance(end, (std::min)((unsigned)::boost::re_detail::distance(position, last), desired));
|
||||
BidiIterator origin(position);
|
||||
while((position != end) && (traits_inst.translate(*position, icase) == what))
|
||||
desired =
|
||||
(std::min)(
|
||||
(std::size_t)(greedy ? rep->max : rep->min),
|
||||
(std::size_t)::boost::re_detail::distance(position, last));
|
||||
count = desired;
|
||||
++desired;
|
||||
if(icase)
|
||||
{
|
||||
++position;
|
||||
while(--desired && (traits_inst.translate_nocase(*position) == what))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
}
|
||||
count = (unsigned)::boost::re_detail::distance(origin, position);
|
||||
else
|
||||
{
|
||||
while(--desired && (traits_inst.translate(*position) == what))
|
||||
{
|
||||
++position;
|
||||
}
|
||||
}
|
||||
count = count - desired;
|
||||
}
|
||||
else
|
||||
{
|
||||
count = 0;
|
||||
desired = greedy ? rep->max : rep->min;
|
||||
while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
|
||||
{
|
||||
++position;
|
||||
|
Reference in New Issue
Block a user