From 955d077d2b69d7c1014e36d86c737f7eb4b057b7 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Wed, 17 Feb 2016 18:58:05 +0000 Subject: [PATCH] Allow types wider than int in \x{} expressions (for char32_t etc). Fixes: https://svn.boost.org/trac/boost/ticket/11988. --- include/boost/regex/icu.hpp | 2 +- include/boost/regex/v4/basic_regex_parser.hpp | 47 ++++++++++++------- include/boost/regex/v4/cpp_regex_traits.hpp | 8 ++-- include/boost/regex/v4/instances.hpp | 2 +- .../boost/regex/v4/perl_matcher_common.hpp | 8 ++-- include/boost/regex/v4/regex_format.hpp | 4 +- include/boost/regex/v4/regex_traits.hpp | 2 +- .../boost/regex/v4/regex_traits_defaults.hpp | 6 +-- include/boost/regex/v4/w32_regex_traits.hpp | 4 +- test/Jamfile.v2 | 1 + 10 files changed, 50 insertions(+), 34 deletions(-) diff --git a/include/boost/regex/icu.hpp b/include/boost/regex/icu.hpp index a70aa0da..719ee220 100644 --- a/include/boost/regex/icu.hpp +++ b/include/boost/regex/icu.hpp @@ -152,7 +152,7 @@ public: char_class_type lookup_classname(const char_type* p1, const char_type* p2) const; string_type lookup_collatename(const char_type* p1, const char_type* p2) const; bool isctype(char_type c, char_class_type f) const; - int toi(const char_type*& p1, const char_type* p2, int radix)const + boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const { return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); } diff --git a/include/boost/regex/v4/basic_regex_parser.hpp b/include/boost/regex/v4/basic_regex_parser.hpp index d097eed9..aefabacb 100644 --- a/include/boost/regex/v4/basic_regex_parser.hpp +++ b/include/boost/regex/v4/basic_regex_parser.hpp @@ -38,6 +38,21 @@ namespace BOOST_REGEX_DETAIL_NS{ #pragma warning(disable:4244 4800) #endif +inline boost::intmax_t umax(mpl::false_ const&) +{ + // Get out clause here, just in case numeric_limits is unspecialized: + return std::numeric_limits::is_specialized ? (std::numeric_limits::max)() : INT_MAX; +} +inline boost::intmax_t umax(mpl::true_ const&) +{ + return (std::numeric_limits::max)(); +} + +inline boost::intmax_t umax() +{ + return umax(mpl::bool_::digits >= std::numeric_limits::digits>()); +} + template class basic_regex_parser : public basic_regex_creator { @@ -868,7 +883,7 @@ escape_type_class_jump: return false; } const charT* pc = m_position; - int i = this->m_traits.toi(pc, m_end, 10); + boost::intmax_t i = this->m_traits.toi(pc, m_end, 10); if((i < 0) && syn_end) { // Check for a named capture, get the leftmost one if there is more than one: @@ -1075,7 +1090,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) // parse a repeat-range: // std::size_t min, max; - int v; + boost::intmax_t v; // skip whitespace: while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) ++m_position; @@ -1094,7 +1109,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) // get min: v = this->m_traits.toi(m_position, m_end, 10); // skip whitespace: - if(v < 0) + if((v < 0) || (v > umax())) { if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) { @@ -1120,7 +1135,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; return parse_literal(); } - min = v; + min = static_cast(v); // see if we have a comma: if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) { @@ -1143,7 +1158,7 @@ bool basic_regex_parser::parse_repeat_range(bool isbasic) } // get the value if any: v = this->m_traits.toi(m_position, m_end, 10); - max = (v >= 0) ? (std::size_t)v : (std::numeric_limits::max)(); + max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits::max)(); } else { @@ -1665,19 +1680,19 @@ digraph basic_regex_parser::get_next_set_literal(basic_cha // does a value fit in the specified charT type? // template -bool valid_value(charT, int v, const mpl::true_&) +bool valid_value(charT, boost::intmax_t v, const mpl::true_&) { return (v >> (sizeof(charT) * CHAR_BIT)) == 0; } template -bool valid_value(charT, int, const mpl::false_&) +bool valid_value(charT, boost::intmax_t, const mpl::false_&) { return true; // v will alsways fit in a charT } template -bool valid_value(charT c, int v) +bool valid_value(charT c, boost::intmax_t v) { - return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>()); + return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>()); } template @@ -1753,10 +1768,10 @@ charT basic_regex_parser::unescape_character() fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); return result; } - int i = this->m_traits.toi(m_position, m_end, 16); + boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16); if((m_position == m_end) || (i < 0) - || ((std::numeric_limits::is_specialized) && (i > (int)(std::numeric_limits::max)())) + || ((std::numeric_limits::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits::max)())) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) { // Rewind to start of escape: @@ -1771,7 +1786,7 @@ charT basic_regex_parser::unescape_character() else { std::ptrdiff_t len = (std::min)(static_cast(2), static_cast(m_end - m_position)); - int i = this->m_traits.toi(m_position, m_position + len, 16); + boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16); if((i < 0) || !valid_value(charT(0), i)) { @@ -1790,7 +1805,7 @@ charT basic_regex_parser::unescape_character() // followed by up to 3 octal digits: std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast(4)); const charT* bp = m_position; - int val = this->m_traits.toi(bp, bp + 1, 8); + boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8); if(val != 0) { // Rewind to start of escape: @@ -1801,7 +1816,7 @@ charT basic_regex_parser::unescape_character() return result; } val = this->m_traits.toi(m_position, m_position + len, 8); - if(val < 0) + if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits::max)())) { // Rewind to start of escape: --m_position; @@ -1874,7 +1889,7 @@ bool basic_regex_parser::parse_backref() { BOOST_ASSERT(m_position != m_end); const charT* pc = m_position; - int i = this->m_traits.toi(pc, pc + 1, 10); + boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10); if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) { // not a backref at all but an octal escape sequence: @@ -1996,7 +2011,7 @@ bool basic_regex_parser::parse_perl_extension() int max_mark = m_max_mark; m_mark_reset = -1; m_max_mark = m_mark_count; - int v; + boost::intmax_t v; // // select the actual extension used: // diff --git a/include/boost/regex/v4/cpp_regex_traits.hpp b/include/boost/regex/v4/cpp_regex_traits.hpp index 709663a3..b7b32d8a 100644 --- a/include/boost/regex/v4/cpp_regex_traits.hpp +++ b/include/boost/regex/v4/cpp_regex_traits.hpp @@ -1027,11 +1027,11 @@ public: return m_pimpl->isctype(c, f); #endif } - int toi(const charT*& p1, const charT* p2, int radix)const; + boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const; int value(charT c, int radix)const { const charT* pc = &c; - return toi(pc, pc + 1, radix); + return (int)toi(pc, pc + 1, radix); } locale_type imbue(locale_type l) { @@ -1069,7 +1069,7 @@ private: template -int cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const +boost::intmax_t cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const { BOOST_REGEX_DETAIL_NS::parser_buf sbuf; // buffer for parsing numbers. std::basic_istream is(&sbuf); // stream for parsing numbers. @@ -1082,7 +1082,7 @@ int cpp_regex_traits::toi(const charT*& first, const charT* last, int rad if(std::abs(radix) == 16) is >> std::hex; else if(std::abs(radix) == 8) is >> std::oct; else is >> std::dec; - int val; + boost::intmax_t val; if(is >> val) { first = first + ((last - first) - sbuf.in_avail()); diff --git a/include/boost/regex/v4/instances.hpp b/include/boost/regex/v4/instances.hpp index f66b237c..05ac71a6 100644 --- a/include/boost/regex/v4/instances.hpp +++ b/include/boost/regex/v4/instances.hpp @@ -157,7 +157,7 @@ bool cpp_regex_traits_implementation::isctype(const BOOST_RE #endif } // namespace template BOOST_REGEX_DECL -int cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; +boost::intmax_t cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; template BOOST_REGEX_DECL std::string cpp_regex_traits::catalog_name(const std::string& name); template BOOST_REGEX_DECL diff --git a/include/boost/regex/v4/perl_matcher_common.hpp b/include/boost/regex/v4/perl_matcher_common.hpp index f3949ccf..e60581df 100644 --- a/include/boost/regex/v4/perl_matcher_common.hpp +++ b/include/boost/regex/v4/perl_matcher_common.hpp @@ -206,7 +206,7 @@ bool perl_matcher::match_imp() search_base = base; state_count = 0; m_match_flags |= regex_constants::match_all; - m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); m_presult->set_base(base); m_presult->set_named_subs(this->re.get_named_subs()); if(m_match_flags & match_posix) @@ -268,7 +268,7 @@ bool perl_matcher::find_imp() // reset our state machine: search_base = position = base; pstate = re.get_first_state(); - m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), base, last); m_presult->set_base(base); m_presult->set_named_subs(this->re.get_named_subs()); m_match_flags |= regex_constants::match_init; @@ -287,13 +287,13 @@ bool perl_matcher::find_imp() ++position; } // reset $` start: - m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); + m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast(1u + re.mark_count()), search_base, last); //if((base != search_base) && (base == backstop)) // m_match_flags |= match_prev_avail; } if(m_match_flags & match_posix) { - m_result.set_size(1 + re.mark_count(), base, last); + m_result.set_size(static_cast(1u + re.mark_count()), base, last); m_result.set_base(base); } diff --git a/include/boost/regex/v4/regex_format.hpp b/include/boost/regex/v4/regex_format.hpp index e9006a7b..f0a0a11e 100644 --- a/include/boost/regex/v4/regex_format.hpp +++ b/include/boost/regex/v4/regex_format.hpp @@ -86,7 +86,7 @@ struct trivial_format_traits } int toi(const charT*& p1, const charT* p2, int radix)const { - return global_toi(p1, p2, radix, *this); + return (int)global_toi(p1, p2, radix, *this); } }; @@ -165,7 +165,7 @@ private: std::vector v(i, j); const char_type* start = &v[0]; const char_type* pos = start; - int r = m_traits.toi(pos, &v[0] + v.size(), base); + int r = (int)m_traits.toi(pos, &v[0] + v.size(), base); std::advance(i, pos - start); return r; } diff --git a/include/boost/regex/v4/regex_traits.hpp b/include/boost/regex/v4/regex_traits.hpp index 45a4bdf6..5d427706 100644 --- a/include/boost/regex/v4/regex_traits.hpp +++ b/include/boost/regex/v4/regex_traits.hpp @@ -109,7 +109,7 @@ struct default_wrapper : public BaseT { return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast(c)) : ::boost::regex_constants::escape_type_identity; } - int toi(const char_type*& p1, const char_type* p2, int radix)const + boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const { return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); } diff --git a/include/boost/regex/v4/regex_traits_defaults.hpp b/include/boost/regex/v4/regex_traits_defaults.hpp index 18218837..2a2cf21d 100644 --- a/include/boost/regex/v4/regex_traits_defaults.hpp +++ b/include/boost/regex/v4/regex_traits_defaults.hpp @@ -304,13 +304,13 @@ int global_value(charT c) return -1; } template -int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) +boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) { (void)t; // warning suppression - int next_value = t.value(*p1, radix); + boost::intmax_t next_value = t.value(*p1, radix); if((p1 == p2) || (next_value < 0) || (next_value >= radix)) return -1; - int result = 0; + boost::intmax_t result = 0; while(p1 != p2) { next_value = t.value(*p1, radix); diff --git a/include/boost/regex/v4/w32_regex_traits.hpp b/include/boost/regex/v4/w32_regex_traits.hpp index 560cc217..bf996d61 100644 --- a/include/boost/regex/v4/w32_regex_traits.hpp +++ b/include/boost/regex/v4/w32_regex_traits.hpp @@ -640,13 +640,13 @@ public: return true; return false; } - int toi(const charT*& p1, const charT* p2, int radix)const + boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const { return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); } int value(charT c, int radix)const { - int result = ::boost::BOOST_REGEX_DETAIL_NS::global_value(c); + int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c); return result < radix ? result : -1; } locale_type imbue(locale_type l) diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 06198b04..6617d188 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -153,6 +153,7 @@ test-suite regex [ link concepts/concept_check.cpp ../build//boost_regex ] [ link concepts/icu_concept_check.cpp ../build//boost_regex ] [ link concepts/range_concept_check.cpp ../build//boost_regex ] + [ run concepts/test_bug_11988.cpp ../build//boost_regex ] [ run # sources