diff --git a/include/boost/regex/config.hpp b/include/boost/regex/config.hpp index a88a7852..3b92e6ae 100644 --- a/include/boost/regex/config.hpp +++ b/include/boost/regex/config.hpp @@ -163,8 +163,10 @@ * with MSVC and the /Zc:wchar_t option we place some extra unsigned short versions * of the non-inline functions in the library, so that users can still link to the lib, * irrespective of whether their own code is built with /Zc:wchar_t. + * Note that this does NOT WORK with VC10 when the C++ locale is in effect as + * the locale's facets simply do not compile in that case. */ -#if defined(__cplusplus) && (defined(BOOST_MSVC) || defined(__ICL)) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) && defined(BOOST_WINDOWS) && !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION) && !defined(BOOST_RWSTD_VER) +#if defined(__cplusplus) && (defined(BOOST_MSVC) || defined(__ICL)) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) && defined(BOOST_WINDOWS) && !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION) && !defined(BOOST_RWSTD_VER) && ((_MSC_VER < 1600) || !defined(BOOST_REGEX_USE_CPP_LOCALE)) # define BOOST_REGEX_HAS_OTHER_WCHAR_T # ifdef BOOST_MSVC # pragma warning(push) diff --git a/include/boost/regex/icu.hpp b/include/boost/regex/icu.hpp index e9a55c0c..772806e9 100644 --- a/include/boost/regex/icu.hpp +++ b/include/boost/regex/icu.hpp @@ -251,7 +251,7 @@ inline u32regex do_make_u32regex(InputIterator i, const boost::mpl::int_<1>*) { typedef boost::u8_to_u32_iterator conv_type; - return u32regex(conv_type(i), conv_type(j), opt); + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); } template @@ -261,7 +261,7 @@ inline u32regex do_make_u32regex(InputIterator i, const boost::mpl::int_<2>*) { typedef boost::u16_to_u32_iterator conv_type; - return u32regex(conv_type(i), conv_type(j), opt); + return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt); } template @@ -282,7 +282,7 @@ inline u32regex do_make_u32regex(InputIterator i, typedef boost::u8_to_u32_iterator conv_type; typedef std::vector vector_type; vector_type v; - conv_type a(i), b(j); + conv_type a(i, i, j), b(j, i, j); while(a != b) { v.push_back(*a); @@ -302,7 +302,7 @@ inline u32regex do_make_u32regex(InputIterator i, typedef boost::u16_to_u32_iterator conv_type; typedef std::vector vector_type; vector_type v; - conv_type a(i), b(j); + conv_type a(i, i, j), b(j, i, j); while(a != b) { v.push_back(*a); @@ -425,7 +425,7 @@ bool do_regex_match(BidiIterator first, BidiIterator last, typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); // copy results across to m: if(result) copy_results(m, what); return result; @@ -441,7 +441,7 @@ bool do_regex_match(BidiIterator first, BidiIterator last, typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags); // copy results across to m: if(result) copy_results(m, what); return result; @@ -600,7 +600,7 @@ bool do_regex_search(BidiIterator first, BidiIterator last, typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); // copy results across to m: if(result) copy_results(m, what); return result; @@ -617,7 +617,7 @@ bool do_regex_search(BidiIterator first, BidiIterator last, typedef match_results match_type; typedef typename match_type::allocator_type alloc_type; match_type what; - bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base)); // copy results across to m: if(result) copy_results(m, what); return result; @@ -764,13 +764,13 @@ template inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > make_utf32_seq(I i, I j, mpl::int_<1> const*) { - return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i), boost::u8_to_u32_iterator(j)); + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(i, i, j), boost::u8_to_u32_iterator(j, i, j)); } template inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > make_utf32_seq(I i, I j, mpl::int_<2> const*) { - return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i), boost::u16_to_u32_iterator(j)); + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(i, i, j), boost::u16_to_u32_iterator(j, i, j)); } template inline std::pair< I, I > @@ -782,13 +782,15 @@ template inline std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator > make_utf32_seq(const charT* p, mpl::int_<1> const*) { - return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p), boost::u8_to_u32_iterator(p+std::strlen((const char*)p))); + std::size_t len = std::strlen((const char*)p); + return std::pair< boost::u8_to_u32_iterator, boost::u8_to_u32_iterator >(boost::u8_to_u32_iterator(p, p, p+len), boost::u8_to_u32_iterator(p+len, p, p+len)); } template inline std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator > make_utf32_seq(const charT* p, mpl::int_<2> const*) { - return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p), boost::u16_to_u32_iterator(p+u_strlen((const UChar*)p))); + std::size_t len = u_strlen((const UChar*)p); + return std::pair< boost::u16_to_u32_iterator, boost::u16_to_u32_iterator >(boost::u16_to_u32_iterator(p, p, p + len), boost::u16_to_u32_iterator(p+len, p, p + len)); } template inline std::pair< const charT*, const charT* > diff --git a/include/boost/regex/pending/object_cache.hpp b/include/boost/regex/pending/object_cache.hpp index db60e28a..05b6bfa2 100644 --- a/include/boost/regex/pending/object_cache.hpp +++ b/include/boost/regex/pending/object_cache.hpp @@ -73,7 +73,7 @@ boost::shared_ptr object_cache::get(const Key& k, siz // for now just throw, but we should never really get here... // ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); -#ifdef BOOST_NO_UNREACHABLE_RETURN_DETECTION +#if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS) return boost::shared_ptr(); #endif #else diff --git a/include/boost/regex/pending/unicode_iterator.hpp b/include/boost/regex/pending/unicode_iterator.hpp index 657ca0a4..e6399b50 100644 --- a/include/boost/regex/pending/unicode_iterator.hpp +++ b/include/boost/regex/pending/unicode_iterator.hpp @@ -82,16 +82,16 @@ static const ::boost::uint32_t ten_bit_mask = 0x3FFu; inline bool is_high_surrogate(::boost::uint16_t v) { - return (v & 0xFC00u) == 0xd800u; + return (v & 0xFFFFFC00u) == 0xd800u; } inline bool is_low_surrogate(::boost::uint16_t v) { - return (v & 0xFC00u) == 0xdc00u; + return (v & 0xFFFFFC00u) == 0xdc00u; } template inline bool is_surrogate(T v) { - return (v & 0xF800u) == 0xd800; + return (v & 0xFFFFF800u) == 0xd800; } inline unsigned utf8_byte_count(boost::uint8_t c) @@ -113,6 +113,10 @@ inline unsigned utf8_trailing_byte_count(boost::uint8_t c) return utf8_byte_count(c) - 1; } +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4100) +#endif inline void invalid_utf32_code_point(::boost::uint32_t val) { #ifndef BOOST_NO_STD_LOCALE @@ -124,6 +128,9 @@ inline void invalid_utf32_code_point(::boost::uint32_t val) #endif boost::throw_exception(e); } +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif } // namespace detail @@ -296,6 +303,34 @@ public: { m_value = pending_read; } + // + // Range checked version: + // + u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // The range must not start with a low surrogate, or end in a high surrogate, + // otherwise we run the risk of running outside the underlying input range. + // Likewise b must not be located at a low surrogate. + // + boost::uint16_t val; + if(start != end) + { + if((b != start) && (b != end)) + { + val = *b; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + } + val = *start; + if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) + invalid_code_point(val); + val = *--end; + if(detail::is_high_surrogate(val)) + invalid_code_point(val); + } + } private: static void invalid_code_point(::boost::uint16_t val) { @@ -497,7 +532,7 @@ public: while((*--m_position & 0xC0u) == 0x80u) ++count; // now check that the sequence was valid: if(count != detail::utf8_trailing_byte_count(*m_position)) - invalid_sequnce(); + invalid_sequence(); m_value = pending_read; } BaseIterator base()const @@ -513,8 +548,37 @@ public: { m_value = pending_read; } + // + // Checked constructor: + // + u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) + { + m_value = pending_read; + // + // We must not start with a continuation character, or end with a + // truncated UTF-8 sequence otherwise we run the risk of going past + // the start/end of the underlying sequence: + // + if(start != end) + { + unsigned char v = *start; + if((v & 0xC0u) == 0x80u) + invalid_sequence(); + if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) + invalid_sequence(); + BaseIterator pos = end; + do + { + v = *--pos; + } + while((start != pos) && ((v & 0xC0u) == 0x80u)); + std::ptrdiff_t extra = detail::utf8_byte_count(v); + if(std::distance(pos, end) < extra) + invalid_sequence(); + } + } private: - static void invalid_sequnce() + static void invalid_sequence() { std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); boost::throw_exception(e); @@ -524,7 +588,7 @@ private: m_value = static_cast(static_cast< ::boost::uint8_t>(*m_position)); // we must not have a continuation character: if((m_value & 0xC0u) == 0x80u) - invalid_sequnce(); + invalid_sequence(); // see how many extra byts we have: unsigned extra = detail::utf8_trailing_byte_count(*m_position); // extract the extra bits, 6 from each extra byte: @@ -547,7 +611,7 @@ private: m_value &= masks[extra]; // check the result: if(m_value > static_cast(0x10FFFFu)) - invalid_sequnce(); + invalid_sequence(); } BaseIterator m_position; mutable U32Type m_value; diff --git a/include/boost/regex/v4/basic_regex.hpp b/include/boost/regex/v4/basic_regex.hpp index 04c7bb37..1e71877e 100644 --- a/include/boost/regex/v4/basic_regex.hpp +++ b/include/boost/regex/v4/basic_regex.hpp @@ -1,7 +1,7 @@ /* * - * Copyright (c) 1998-2004 - * John Maddock + * Copyright (c) 1998-2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries * * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at @@ -234,7 +234,7 @@ public: std::pair BOOST_REGEX_CALL subexpression(std::size_t n)const { if(n == 0) - throw std::out_of_range("0 is not a valid subexpression index."); + boost::throw_exception(std::out_of_range("0 is not a valid subexpression index.")); const std::pair& pi = this->m_subs.at(n - 1); std::pair p(expression() + pi.first, expression() + pi.second); return p; @@ -398,7 +398,7 @@ public: typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); if(a.size()) - assign(&*a.begin(), &*a.begin() + a.size(), f); + assign(static_cast(&*a.begin()), static_cast(&*a.begin() + a.size()), f); else assign(static_cast(0), static_cast(0), f); } @@ -487,7 +487,7 @@ public: std::pair BOOST_REGEX_CALL subexpression(std::size_t n)const { if(!m_pimpl.get()) - throw std::logic_error("Can't access subexpressions in an invalid regex."); + boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex.")); return m_pimpl->subexpression(n); } const_iterator BOOST_REGEX_CALL begin()const diff --git a/include/boost/regex/v4/basic_regex_creator.hpp b/include/boost/regex/v4/basic_regex_creator.hpp index efa9f7dd..c4b1c048 100644 --- a/include/boost/regex/v4/basic_regex_creator.hpp +++ b/include/boost/regex/v4/basic_regex_creator.hpp @@ -241,6 +241,7 @@ protected: unsigned m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; bool m_has_recursions; // set when we have recursive expresisons to fixup + std::vector m_recursion_checks; // notes which recursions we've followed while analysing this expression typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character @@ -712,6 +713,8 @@ void basic_regex_creator::finalize(const charT* p1, const charT* m_pdata->m_can_be_null = 0; m_bad_repeats = 0; + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); // get the restart type: m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); @@ -948,9 +951,14 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) state = state->next.p; } } + // now work through our list, building all the maps as we go: while(v.size()) { + // Initialize m_recursion_checks if we need it: + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); + const std::pair& p = v.back(); m_icase = p.first; state = p.second; @@ -960,6 +968,9 @@ void basic_regex_creator::create_startmaps(re_syntax_base* state) m_bad_repeats = 0; create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); m_bad_repeats = 0; + + if(m_has_recursions) + m_recursion_checks.assign(1 + m_pdata->m_mark_count, false); create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); // adjust the type of the state to allow for faster matching: state->type = this->get_repeat_type(state); @@ -1114,7 +1125,11 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, } case syntax_element_recurse: { - if(recursion_start == state) + if(state->type == syntax_element_startmark) + recursion_sub = static_cast(state)->index; + else + recursion_sub = 0; + if(m_recursion_checks[recursion_sub]) { // Infinite recursion!! if(0 == this->m_pdata->m_status) // update the error code if not already set @@ -1139,12 +1154,10 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, recursion_start = state; recursion_restart = state->next.p; state = static_cast(state)->alt.p; - if(state->type == syntax_element_startmark) - recursion_sub = static_cast(state)->index; - else - recursion_sub = 0; + m_recursion_checks[recursion_sub] = true; break; } + m_recursion_checks[recursion_sub] = true; // fall through, can't handle nested recursion here... } case syntax_element_backref: @@ -1213,7 +1226,7 @@ void basic_regex_creator::create_startmap(re_syntax_base* state, for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) { charT c = static_cast(i); - if(&c != re_is_set_member(&c, &c + 1, static_cast*>(state), *m_pdata, m_icase)) + if(&c != re_is_set_member(&c, &c + 1, static_cast*>(state), *m_pdata, l_icase)) l_map[i] |= mask; } } diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index 0192a9f1..87ff6e20 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -941,7 +941,8 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ ++m_position; } // for perl regexes only check for pocessive ++ repeats. - if((0 == (this->flags() & regbase::main_option_type)) + if((m_position != m_end) + && (0 == (this->flags() & regbase::main_option_type)) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) { pocessive = true; @@ -1025,13 +1026,14 @@ bool basic_regex_parser::parse_repeat(std::size_t low, std::size_ { // // Check for illegal following quantifier, we have to do this here, because - // the extra states we insert below circumvents are usual error checking :-( + // the extra states we insert below circumvents our usual error checking :-( // switch(this->m_traits.syntax_type(*m_position)) { case regex_constants::syntax_star: case regex_constants::syntax_plus: case regex_constants::syntax_question: + case regex_constants::syntax_open_brace: fail(regex_constants::error_badrepeat, m_position - m_base); return false; } diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index cd22bd8e..bcae455a 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -1,7 +1,7 @@ /* * - * Copyright (c) 2004 - * John Maddock + * Copyright (c) 2004 John Maddock + * Copyright 2011 Garmin Ltd. or its subsidiaries * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file @@ -511,7 +511,9 @@ typename cpp_regex_traits_implementation::string_type // however at least one std lib will always throw // std::bad_alloc for certain arguments... // +#ifndef BOOST_NO_EXCEPTIONS try{ +#endif // // What we do here depends upon the format of the sort key returned by // sort key returned by this->transform: @@ -546,7 +548,9 @@ typename cpp_regex_traits_implementation::string_type result.erase(i); break; } +#ifndef BOOST_NO_EXCEPTIONS }catch(...){} +#endif while(result.size() && (charT(0) == *result.rbegin())) result.erase(result.size() - 1); if(result.empty()) @@ -576,7 +580,9 @@ typename cpp_regex_traits_implementation::string_type // std::bad_alloc for certain arguments... // string_type result; +#ifndef BOOST_NO_EXCEPTIONS try{ +#endif result = this->m_pcollate->transform(p1, p2); // // Borland's STLPort version returns a NULL-terminated @@ -593,10 +599,12 @@ typename cpp_regex_traits_implementation::string_type result.erase(result.size() - 1); #endif BOOST_ASSERT(std::find(result.begin(), result.end(), charT(0)) == result.end()); +#ifndef BOOST_NO_EXCEPTIONS } catch(...) { } +#endif return result; } diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index 4406839f..e05862fa 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -842,7 +842,15 @@ OutputIterator regex_format_imp(OutputIterator out, BOOST_MPL_HAS_XXX_TRAIT_DEF(const_iterator) -struct any_type { any_type(...); }; +struct any_type +{ + template + any_type(const T&); + template + any_type(const T&, const U&); + template + any_type(const T&, const U&, const V&); +}; typedef char no_type; typedef char (&unary_type)[2]; typedef char (&binary_type)[3]; diff --git a/include/boost/regex/v4/u32regex_token_iterator.hpp b/include/boost/regex/v4/u32regex_token_iterator.hpp index 4b0ac927..de167716 100644 --- a/include/boost/regex/v4/u32regex_token_iterator.hpp +++ b/include/boost/regex/v4/u32regex_token_iterator.hpp @@ -317,14 +317,14 @@ inline u32regex_token_iterator make_u32regex_token_iterator(cons template inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) { - return u32regex_token_iterator(p, p+u_strlen(p), e, m); + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); } #endif template inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) { typedef typename std::basic_string::const_iterator iter_type; - return u32regex_token_iterator(p.begin(), p.end(), e, m); + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); } template inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) @@ -354,7 +354,7 @@ template inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) { typedef typename std::basic_string::const_iterator iter_type; - return u32regex_token_iterator(p.begin(), p.end(), e, m); + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); } inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) {