Consolidated patches and performance tweeks.

[SVN r31492]
This commit is contained in:
John Maddock
2005-10-27 10:24:50 +00:00
parent 5fb9181efa
commit ffa362ce17
8 changed files with 65 additions and 64 deletions

View File

@ -66,6 +66,7 @@ struct regex_data
unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
unsigned int m_can_be_null; // whether we can match a null string
re_detail::raw_storage m_data; // the buffer in which our states are constructed
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
};
//
// class basic_regex_implementation

View File

@ -265,6 +265,7 @@ basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits
m_lower_mask = m_traits.lookup_classname(l, l + 5);
m_upper_mask = m_traits.lookup_classname(u, u + 5);
m_alpha_mask = m_traits.lookup_classname(a, a + 5);
m_pdata->m_word_mask = m_word_mask;
BOOST_ASSERT(m_word_mask != 0);
BOOST_ASSERT(m_mask_space != 0);
BOOST_ASSERT(m_lower_mask != 0);
@ -765,7 +766,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
{
// Oops error:
if(0 == this->m_pdata->m_status) // update the error code if not already set
this->m_pdata->m_status = boost::regex_constants::error_brack;
this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
//
// clear the expression, we should be empty:
//
@ -776,8 +777,8 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
//
if(0 == (this->flags() & regex_constants::no_except))
{
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_brack);
boost::regex_error e(message, boost::regex_constants::error_brack, 0);
std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
e.raise();
}
}

View File

@ -766,6 +766,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
case syntax_element_soft_buffer_end:
case syntax_element_restart_continue:
case syntax_element_jump:
case syntax_element_startmark:
// can't legally repeat any of the above:
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
@ -1342,6 +1343,10 @@ bool valid_value(charT c, int v)
template <class charT, class traits>
charT basic_regex_parser<charT, traits>::unescape_character()
{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4127)
#endif
charT result(0);
if(m_position == m_end)
{
@ -1494,6 +1499,9 @@ charT basic_regex_parser<charT, traits>::unescape_character()
}
++m_position;
return result;
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif
}
template <class charT, class traits>
@ -1737,6 +1745,9 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
}
break;
}
case regex_constants::syntax_close_mark:
fail(regex_constants::error_badrepeat, m_position - m_base);
return false;
default:
//
// lets assume that we have a (?imsx) group and try and parse it:

View File

@ -167,10 +167,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL
namespace re_detail{
template BOOST_REGEX_DECL void perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::construct_init(
BOOST_REGEX_CHAR_T const * first, BOOST_REGEX_CHAR_T const * end,
match_results<BOOST_REGEX_CHAR_T const *>& what,
const basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& e,
match_flag_type f);
const basic_regex<BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >& e, match_flag_type f);
template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::match();
template BOOST_REGEX_DECL bool perl_matcher<BOOST_REGEX_CHAR_T const *, match_results< const BOOST_REGEX_CHAR_T* >::allocator_type BOOST_REGEX_TRAITS_T >::find();
} // namespace
@ -186,10 +183,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL
namespace re_detail{
template BOOST_REGEX_DECL void perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::construct_init(
std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator first, std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator end,
match_results<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator>& what,
const basic_regex<BOOST_REGEX_CHAR_T>& e,
match_flag_type f);
const basic_regex<BOOST_REGEX_CHAR_T>& e, match_flag_type f);
template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::match();
template BOOST_REGEX_DECL bool perl_matcher<std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator, match_results< std::basic_string<BOOST_REGEX_CHAR_T>::const_iterator >::allocator_type, boost::regex_traits<BOOST_REGEX_CHAR_T > >::find();
} // namespace

View File

@ -29,7 +29,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::sy
// function can_start:
//
template <class charT>
bool can_start(charT c, const unsigned char* map, unsigned char mask)
inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
{
return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
}
@ -327,7 +327,13 @@ public:
match_results<BidiIterator, Allocator>& what,
const basic_regex<char_type, traits>& e,
match_flag_type f,
BidiIterator base);
BidiIterator base)
: m_result(what), base(first), last(end),
position(first), backstop(base), re(e), traits_inst(e.get_traits()),
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
{
construct_init(e, f);
}
bool match();
bool find();
@ -338,10 +344,8 @@ public:
{ m_match_flags &= ~f; }
private:
void construct_init(BidiIterator first, BidiIterator end,
match_results<BidiIterator, Allocator>& what,
const basic_regex<char_type, traits>& e,
match_flag_type f);
void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
bool find_imp();
bool match_imp();
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD

View File

@ -32,25 +32,10 @@ namespace boost{
namespace re_detail{
template <class BidiIterator, class Allocator, class traits>
perl_matcher<BidiIterator, Allocator, traits>::perl_matcher(BidiIterator first, BidiIterator end,
match_results<BidiIterator, Allocator>& what,
const basic_regex<char_type, traits>& e,
match_flag_type f,
BidiIterator b)
: m_result(what), base(first), last(end),
position(first), backstop(b), re(e), traits_inst(e.get_traits()),
m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
{
construct_init(first, last, what, e, f);
}
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator first, BidiIterator end,
match_results<BidiIterator, Allocator>& what,
const basic_regex<char_type, traits>& e,
match_flag_type f)
void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
{
typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
if(e.empty())
{
@ -60,13 +45,14 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator
}
pstate = 0;
m_match_flags = f;
icase = re.flags() & regex_constants::icase;
estimate_max_state_count(static_cast<category*>(0));
expression_flag_type re_f = re.flags();
icase = re_f & regex_constants::icase;
if(!(m_match_flags & (match_perl|match_posix)))
{
if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
m_match_flags |= match_perl;
else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
m_match_flags |= match_perl;
else
m_match_flags |= match_posix;
@ -83,8 +69,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::construct_init(BidiIterator
m_backup_state = 0;
#endif
// find the value to use for matching word boundaries:
const char_type w = static_cast<char_type>('w');
m_word_mask = traits_inst.lookup_classname(&w, &w+1);
m_word_mask = re.get_data().m_word_mask;
// find bitmask to use for matching '.':
match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
}
@ -104,7 +89,7 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std
}
template <class BidiIterator, class Allocator, class traits>
void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
{
// we don't know how long the sequence is:
max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
@ -112,21 +97,9 @@ void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(voi
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
protected_proc_type proc)
{
/*
__try{
return (this->*proc)();
}__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode())
{
reset_stack_guard_page();
}
// we only get here after a stack overflow:
raise_error<traits>(traits_inst, regex_constants::error_size);
// and we never really get here at all:
return false;
*/
::boost::re_detail::concrete_protected_call
<perl_matcher<BidiIterator, Allocator, traits> >
obj(this, proc);
@ -136,7 +109,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
#endif
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::match()
inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
{
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
@ -185,7 +158,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
}
template <class BidiIterator, class Allocator, class traits>
bool perl_matcher<BidiIterator, Allocator, traits>::find()
inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
{
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);

View File

@ -164,7 +164,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
raise_error(traits_inst, regex_constants::error_space);
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
m_has_partial_match = true;
if(false == unwind(false))
bool successful_unwind = unwind(false);
if((m_match_flags & match_partial) && (position == last) && (position != search_base))
m_has_partial_match = true;
if(false == successful_unwind)
return m_recursive_result;
}
}

View File

@ -501,25 +501,39 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
const re_repeat* rep = static_cast<const re_repeat*>(pstate);
BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
unsigned count = 0;
//
// start by working out how much we can skip:
//
bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
std::size_t desired = greedy ? rep->max : rep->min;
std::size_t count, desired;
if(::boost::is_random_access_iterator<BidiIterator>::value)
{
BidiIterator end = position;
std::advance(end, (std::min)((unsigned)::boost::re_detail::distance(position, last), desired));
BidiIterator origin(position);
while((position != end) && (traits_inst.translate(*position, icase) == what))
desired =
(std::min)(
(std::size_t)(greedy ? rep->max : rep->min),
(std::size_t)::boost::re_detail::distance(position, last));
count = desired;
++desired;
if(icase)
{
++position;
while(--desired && (traits_inst.translate_nocase(*position) == what))
{
++position;
}
}
count = (unsigned)::boost::re_detail::distance(origin, position);
else
{
while(--desired && (traits_inst.translate(*position) == what))
{
++position;
}
}
count = count - desired;
}
else
{
count = 0;
desired = greedy ? rep->max : rep->min;
while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
{
++position;