diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index 4d1aef27..851f9e84 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -66,6 +66,7 @@ struct regex_data unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match unsigned int m_can_be_null; // whether we can match a null string re_detail::raw_storage m_data; // the buffer in which our states are constructed + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character }; // // class basic_regex_implementation diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index 373c79f1..623c83f0 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -265,6 +265,7 @@ basic_regex_creator::basic_regex_creator(regex_datam_word_mask = m_word_mask; BOOST_ASSERT(m_word_mask != 0); BOOST_ASSERT(m_mask_space != 0); BOOST_ASSERT(m_lower_mask != 0); @@ -765,7 +766,7 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) { // Oops error: if(0 == this->m_pdata->m_status) // update the error code if not already set - this->m_pdata->m_status = boost::regex_constants::error_brack; + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; // // clear the expression, we should be empty: // @@ -776,8 +777,8 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) // if(0 == (this->flags() & regex_constants::no_except)) { - std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_brack); - boost::regex_error e(message, boost::regex_constants::error_brack, 0); + std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); e.raise(); } } diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 61be3ccf..6fc25dc2 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -766,6 +766,7 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ case syntax_element_soft_buffer_end: case syntax_element_restart_continue: case syntax_element_jump: + case syntax_element_startmark: // can't legally repeat any of the above: fail(regex_constants::error_badrepeat, m_position - m_base); return false; @@ -1342,6 +1343,10 @@ bool valid_value(charT c, int v) template charT basic_regex_parser::unescape_character() { +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif charT result(0); if(m_position == m_end) { @@ -1494,6 +1499,9 @@ charT basic_regex_parser::unescape_character() } ++m_position; return result; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif } template @@ -1737,6 +1745,9 @@ bool basic_regex_parser::parse_perl_extension() } break; } + case regex_constants::syntax_close_mark: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; default: // // lets assume that we have a (?imsx) group and try and parse it: diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp index 4a8d1e64..b9898cb0 100644 --- a/include/boost/regex/v4/instances.hpp +++ b/include/boost/regex/v4/instances.hpp @@ -167,10 +167,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL namespace re_detail{ template BOOST_REGEX_DECL void perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::construct_init( - BOOST_REGEX_CHAR_T const * first, BOOST_REGEX_CHAR_T const * end, - match_results& what, - const basic_regex& e, - match_flag_type f); + const basic_regex& e, match_flag_type f); template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::match(); template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::find(); } // namespace @@ -186,10 +183,7 @@ template BOOST_REGEX_DECL void BOOST_REGEX_CALL namespace re_detail{ template BOOST_REGEX_DECL void perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::construct_init( - std::basic_string::const_iterator first, std::basic_string::const_iterator end, - match_results::const_iterator>& what, - const basic_regex& e, - match_flag_type f); + const basic_regex& e, match_flag_type f); template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::match(); template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, boost::regex_traits >::find(); } // namespace diff --git a/include/boost/regex/v4/perl_matcher.hpp b/include/boost/regex/v4/perl_matcher.hpp index 39d7f37c..22d887cd 100644 --- a/include/boost/regex/v4/perl_matcher.hpp +++ b/include/boost/regex/v4/perl_matcher.hpp @@ -29,7 +29,7 @@ BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::sy // function can_start: // template -bool can_start(charT c, const unsigned char* map, unsigned char mask) +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) { return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); } @@ -327,7 +327,13 @@ public: match_results& what, const basic_regex& e, match_flag_type f, - BidiIterator base); + BidiIterator base) + : m_result(what), base(first), last(end), + position(first), backstop(base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + { + construct_init(e, f); + } bool match(); bool find(); @@ -338,10 +344,8 @@ public: { m_match_flags &= ~f; } private: - void construct_init(BidiIterator first, BidiIterator end, - match_results& what, - const basic_regex& e, - match_flag_type f); + void construct_init(const basic_regex& e, match_flag_type f); + bool find_imp(); bool match_imp(); #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index dd63e98e..f19b6c23 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -32,25 +32,10 @@ namespace boost{ namespace re_detail{ template -perl_matcher::perl_matcher(BidiIterator first, BidiIterator end, - match_results& what, - const basic_regex& e, - match_flag_type f, - BidiIterator b) - : m_result(what), base(first), last(end), - position(first), backstop(b), re(e), traits_inst(e.get_traits()), - m_independent(false), next_count(&rep_obj), rep_obj(&next_count) -{ - construct_init(first, last, what, e, f); -} - -template -void perl_matcher::construct_init(BidiIterator first, BidiIterator end, - match_results& what, - const basic_regex& e, - match_flag_type f) +void perl_matcher::construct_init(const basic_regex& e, match_flag_type f) { typedef typename regex_iterator_traits::iterator_category category; + typedef typename basic_regex::flag_type expression_flag_type; if(e.empty()) { @@ -60,13 +45,14 @@ void perl_matcher::construct_init(BidiIterator } pstate = 0; m_match_flags = f; - icase = re.flags() & regex_constants::icase; estimate_max_state_count(static_cast(0)); + expression_flag_type re_f = re.flags(); + icase = re_f & regex_constants::icase; if(!(m_match_flags & (match_perl|match_posix))) { - if((re.flags() & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) m_match_flags |= match_perl; - else if((re.flags() & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) m_match_flags |= match_perl; else m_match_flags |= match_posix; @@ -83,8 +69,7 @@ void perl_matcher::construct_init(BidiIterator m_backup_state = 0; #endif // find the value to use for matching word boundaries: - const char_type w = static_cast('w'); - m_word_mask = traits_inst.lookup_classname(&w, &w+1); + m_word_mask = re.get_data().m_word_mask; // find bitmask to use for matching '.': match_any_mask = static_cast((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline); } @@ -104,7 +89,7 @@ void perl_matcher::estimate_max_state_count(std } template -void perl_matcher::estimate_max_state_count(void*) +inline void perl_matcher::estimate_max_state_count(void*) { // we don't know how long the sequence is: max_state_count = BOOST_REGEX_MAX_STATE_COUNT; @@ -112,21 +97,9 @@ void perl_matcher::estimate_max_state_count(voi #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD template -bool perl_matcher::protected_call( +inline bool perl_matcher::protected_call( protected_proc_type proc) { - /* - __try{ - return (this->*proc)(); - }__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode()) - { - reset_stack_guard_page(); - } - // we only get here after a stack overflow: - raise_error(traits_inst, regex_constants::error_size); - // and we never really get here at all: - return false; - */ ::boost::re_detail::concrete_protected_call > obj(this, proc); @@ -136,7 +109,7 @@ bool perl_matcher::protected_call( #endif template -bool perl_matcher::match() +inline bool perl_matcher::match() { #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD return protected_call(&perl_matcher::match_imp); @@ -185,7 +158,7 @@ bool perl_matcher::match_imp() } template -bool perl_matcher::find() +inline bool perl_matcher::find() { #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD return protected_call(&perl_matcher::find_imp); diff --git a/include/boost/regex/v4/perl_matcher_non_recursive.hpp b/include/boost/regex/v4/perl_matcher_non_recursive.hpp index dc6f1d09..791c5d93 100644 --- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -164,7 +164,10 @@ bool perl_matcher::match_all_states() raise_error(traits_inst, regex_constants::error_space); if((m_match_flags & match_partial) && (position == last) && (position != search_base)) m_has_partial_match = true; - if(false == unwind(false)) + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(false == successful_unwind) return m_recursive_result; } } diff --git a/include/boost/regex/v4/perl_matcher_recursive.hpp b/include/boost/regex/v4/perl_matcher_recursive.hpp index c0626ebd..74f63bed 100644 --- a/include/boost/regex/v4/perl_matcher_recursive.hpp +++ b/include/boost/regex/v4/perl_matcher_recursive.hpp @@ -501,25 +501,39 @@ bool perl_matcher::match_char_repeat() const re_repeat* rep = static_cast(pstate); BOOST_ASSERT(1 == static_cast(rep->next.p)->length); const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); - unsigned count = 0; // // start by working out how much we can skip: // bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); - std::size_t desired = greedy ? rep->max : rep->min; + std::size_t count, desired; if(::boost::is_random_access_iterator::value) { - BidiIterator end = position; - std::advance(end, (std::min)((unsigned)::boost::re_detail::distance(position, last), desired)); - BidiIterator origin(position); - while((position != end) && (traits_inst.translate(*position, icase) == what)) + desired = + (std::min)( + (std::size_t)(greedy ? rep->max : rep->min), + (std::size_t)::boost::re_detail::distance(position, last)); + count = desired; + ++desired; + if(icase) { - ++position; + while(--desired && (traits_inst.translate_nocase(*position) == what)) + { + ++position; + } } - count = (unsigned)::boost::re_detail::distance(origin, position); + else + { + while(--desired && (traits_inst.translate(*position) == what)) + { + ++position; + } + } + count = count - desired; } else { + count = 0; + desired = greedy ? rep->max : rep->min; while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) { ++position;