Allow types wider than int in \x{} expressions (for char32_t etc).

Fixes: https://svn.boost.org/trac/boost/ticket/11988.
This commit is contained in:
jzmaddock
2016-02-17 18:58:05 +00:00
parent 05dc9f4a44
commit 955d077d2b
10 changed files with 50 additions and 34 deletions

View File

@ -152,7 +152,7 @@ public:
char_class_type lookup_classname(const char_type* p1, const char_type* p2) const; char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
string_type lookup_collatename(const char_type* p1, const char_type* p2) const; string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
bool isctype(char_type c, char_class_type f) const; bool isctype(char_type c, char_class_type f) const;
int toi(const char_type*& p1, const char_type* p2, int radix)const boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
{ {
return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
} }

View File

@ -38,6 +38,21 @@ namespace BOOST_REGEX_DETAIL_NS{
#pragma warning(disable:4244 4800) #pragma warning(disable:4244 4800)
#endif #endif
inline boost::intmax_t umax(mpl::false_ const&)
{
// Get out clause here, just in case numeric_limits is unspecialized:
return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
}
inline boost::intmax_t umax(mpl::true_ const&)
{
return (std::numeric_limits<std::size_t>::max)();
}
inline boost::intmax_t umax()
{
return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
}
template <class charT, class traits> template <class charT, class traits>
class basic_regex_parser : public basic_regex_creator<charT, traits> class basic_regex_parser : public basic_regex_creator<charT, traits>
{ {
@ -868,7 +883,7 @@ escape_type_class_jump:
return false; return false;
} }
const charT* pc = m_position; const charT* pc = m_position;
int i = this->m_traits.toi(pc, m_end, 10); boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
if((i < 0) && syn_end) if((i < 0) && syn_end)
{ {
// Check for a named capture, get the leftmost one if there is more than one: // Check for a named capture, get the leftmost one if there is more than one:
@ -1075,7 +1090,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
// parse a repeat-range: // parse a repeat-range:
// //
std::size_t min, max; std::size_t min, max;
int v; boost::intmax_t v;
// skip whitespace: // skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position; ++m_position;
@ -1094,7 +1109,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
// get min: // get min:
v = this->m_traits.toi(m_position, m_end, 10); v = this->m_traits.toi(m_position, m_end, 10);
// skip whitespace: // skip whitespace:
if(v < 0) if((v < 0) || (v > umax()))
{ {
if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
{ {
@ -1120,7 +1135,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position; while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
return parse_literal(); return parse_literal();
} }
min = v; min = static_cast<std::size_t>(v);
// see if we have a comma: // see if we have a comma:
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
{ {
@ -1143,7 +1158,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
} }
// get the value if any: // get the value if any:
v = this->m_traits.toi(m_position, m_end, 10); v = this->m_traits.toi(m_position, m_end, 10);
max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)(); max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
} }
else else
{ {
@ -1665,19 +1680,19 @@ digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_cha
// does a value fit in the specified charT type? // does a value fit in the specified charT type?
// //
template <class charT> template <class charT>
bool valid_value(charT, int v, const mpl::true_&) bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
{ {
return (v >> (sizeof(charT) * CHAR_BIT)) == 0; return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
} }
template <class charT> template <class charT>
bool valid_value(charT, int, const mpl::false_&) bool valid_value(charT, boost::intmax_t, const mpl::false_&)
{ {
return true; // v will alsways fit in a charT return true; // v will alsways fit in a charT
} }
template <class charT> template <class charT>
bool valid_value(charT c, int v) bool valid_value(charT c, boost::intmax_t v)
{ {
return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>()); return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
} }
template <class charT, class traits> template <class charT, class traits>
@ -1753,10 +1768,10 @@ charT basic_regex_parser<charT, traits>::unescape_character()
fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
return result; return result;
} }
int i = this->m_traits.toi(m_position, m_end, 16); boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
if((m_position == m_end) if((m_position == m_end)
|| (i < 0) || (i < 0)
|| ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)())) || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
{ {
// Rewind to start of escape: // Rewind to start of escape:
@ -1771,7 +1786,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
else else
{ {
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position)); std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
int i = this->m_traits.toi(m_position, m_position + len, 16); boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0) if((i < 0)
|| !valid_value(charT(0), i)) || !valid_value(charT(0), i))
{ {
@ -1790,7 +1805,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
// followed by up to 3 octal digits: // followed by up to 3 octal digits:
std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4)); std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
const charT* bp = m_position; const charT* bp = m_position;
int val = this->m_traits.toi(bp, bp + 1, 8); boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
if(val != 0) if(val != 0)
{ {
// Rewind to start of escape: // Rewind to start of escape:
@ -1801,7 +1816,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
return result; return result;
} }
val = this->m_traits.toi(m_position, m_position + len, 8); val = this->m_traits.toi(m_position, m_position + len, 8);
if(val < 0) if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
{ {
// Rewind to start of escape: // Rewind to start of escape:
--m_position; --m_position;
@ -1874,7 +1889,7 @@ bool basic_regex_parser<charT, traits>::parse_backref()
{ {
BOOST_ASSERT(m_position != m_end); BOOST_ASSERT(m_position != m_end);
const charT* pc = m_position; const charT* pc = m_position;
int i = this->m_traits.toi(pc, pc + 1, 10); boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
{ {
// not a backref at all but an octal escape sequence: // not a backref at all but an octal escape sequence:
@ -1996,7 +2011,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
int max_mark = m_max_mark; int max_mark = m_max_mark;
m_mark_reset = -1; m_mark_reset = -1;
m_max_mark = m_mark_count; m_max_mark = m_mark_count;
int v; boost::intmax_t v;
// //
// select the actual extension used: // select the actual extension used:
// //

View File

@ -1027,11 +1027,11 @@ public:
return m_pimpl->isctype(c, f); return m_pimpl->isctype(c, f);
#endif #endif
} }
int toi(const charT*& p1, const charT* p2, int radix)const; boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const;
int value(charT c, int radix)const int value(charT c, int radix)const
{ {
const charT* pc = &c; const charT* pc = &c;
return toi(pc, pc + 1, radix); return (int)toi(pc, pc + 1, radix);
} }
locale_type imbue(locale_type l) locale_type imbue(locale_type l)
{ {
@ -1069,7 +1069,7 @@ private:
template <class charT> template <class charT>
int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const boost::intmax_t cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
{ {
BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers. BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers.
std::basic_istream<charT> is(&sbuf); // stream for parsing numbers. std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
@ -1082,7 +1082,7 @@ int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int rad
if(std::abs(radix) == 16) is >> std::hex; if(std::abs(radix) == 16) is >> std::hex;
else if(std::abs(radix) == 8) is >> std::oct; else if(std::abs(radix) == 8) is >> std::oct;
else is >> std::dec; else is >> std::dec;
int val; boost::intmax_t val;
if(is >> val) if(is >> val)
{ {
first = first + ((last - first) - sbuf.in_avail()); first = first + ((last - first) - sbuf.in_avail());

View File

@ -157,7 +157,7 @@ bool cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::isctype(const BOOST_RE
#endif #endif
} // namespace } // namespace
template BOOST_REGEX_DECL template BOOST_REGEX_DECL
int cpp_regex_traits<BOOST_REGEX_CHAR_T>::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; boost::intmax_t cpp_regex_traits<BOOST_REGEX_CHAR_T>::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const;
template BOOST_REGEX_DECL template BOOST_REGEX_DECL
std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::catalog_name(const std::string& name); std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::catalog_name(const std::string& name);
template BOOST_REGEX_DECL template BOOST_REGEX_DECL

View File

@ -206,7 +206,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
search_base = base; search_base = base;
state_count = 0; state_count = 0;
m_match_flags |= regex_constants::match_all; m_match_flags |= regex_constants::match_all;
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
m_presult->set_base(base); m_presult->set_base(base);
m_presult->set_named_subs(this->re.get_named_subs()); m_presult->set_named_subs(this->re.get_named_subs());
if(m_match_flags & match_posix) if(m_match_flags & match_posix)
@ -268,7 +268,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
// reset our state machine: // reset our state machine:
search_base = position = base; search_base = position = base;
pstate = re.get_first_state(); pstate = re.get_first_state();
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last); m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
m_presult->set_base(base); m_presult->set_base(base);
m_presult->set_named_subs(this->re.get_named_subs()); m_presult->set_named_subs(this->re.get_named_subs());
m_match_flags |= regex_constants::match_init; m_match_flags |= regex_constants::match_init;
@ -287,13 +287,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
++position; ++position;
} }
// reset $` start: // reset $` start:
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
//if((base != search_base) && (base == backstop)) //if((base != search_base) && (base == backstop))
// m_match_flags |= match_prev_avail; // m_match_flags |= match_prev_avail;
} }
if(m_match_flags & match_posix) if(m_match_flags & match_posix)
{ {
m_result.set_size(1 + re.mark_count(), base, last); m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
m_result.set_base(base); m_result.set_base(base);
} }

View File

@ -86,7 +86,7 @@ struct trivial_format_traits
} }
int toi(const charT*& p1, const charT* p2, int radix)const int toi(const charT*& p1, const charT* p2, int radix)const
{ {
return global_toi(p1, p2, radix, *this); return (int)global_toi(p1, p2, radix, *this);
} }
}; };
@ -165,7 +165,7 @@ private:
std::vector<char_type> v(i, j); std::vector<char_type> v(i, j);
const char_type* start = &v[0]; const char_type* start = &v[0];
const char_type* pos = start; const char_type* pos = start;
int r = m_traits.toi(pos, &v[0] + v.size(), base); int r = (int)m_traits.toi(pos, &v[0] + v.size(), base);
std::advance(i, pos - start); std::advance(i, pos - start);
return r; return r;
} }

View File

@ -109,7 +109,7 @@ struct default_wrapper : public BaseT
{ {
return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::escape_type_identity; return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::escape_type_identity;
} }
int toi(const char_type*& p1, const char_type* p2, int radix)const boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
{ {
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
} }

View File

@ -304,13 +304,13 @@ int global_value(charT c)
return -1; return -1;
} }
template <class charT, class traits> template <class charT, class traits>
int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
{ {
(void)t; // warning suppression (void)t; // warning suppression
int next_value = t.value(*p1, radix); boost::intmax_t next_value = t.value(*p1, radix);
if((p1 == p2) || (next_value < 0) || (next_value >= radix)) if((p1 == p2) || (next_value < 0) || (next_value >= radix))
return -1; return -1;
int result = 0; boost::intmax_t result = 0;
while(p1 != p2) while(p1 != p2)
{ {
next_value = t.value(*p1, radix); next_value = t.value(*p1, radix);

View File

@ -640,13 +640,13 @@ public:
return true; return true;
return false; return false;
} }
int toi(const charT*& p1, const charT* p2, int radix)const boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const
{ {
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this); return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
} }
int value(charT c, int radix)const int value(charT c, int radix)const
{ {
int result = ::boost::BOOST_REGEX_DETAIL_NS::global_value(c); int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
return result < radix ? result : -1; return result < radix ? result : -1;
} }
locale_type imbue(locale_type l) locale_type imbue(locale_type l)

View File

@ -153,6 +153,7 @@ test-suite regex
[ link concepts/concept_check.cpp ../build//boost_regex ] [ link concepts/concept_check.cpp ../build//boost_regex ]
[ link concepts/icu_concept_check.cpp ../build//boost_regex ] [ link concepts/icu_concept_check.cpp ../build//boost_regex ]
[ link concepts/range_concept_check.cpp ../build//boost_regex ] [ link concepts/range_concept_check.cpp ../build//boost_regex ]
[ run concepts/test_bug_11988.cpp ../build//boost_regex ]
[ run [ run
# sources # sources