mirror of
https://github.com/boostorg/regex.git
synced 2025-07-17 14:22:08 +02:00
Allow types wider than int in \x{} expressions (for char32_t etc).
Fixes: https://svn.boost.org/trac/boost/ticket/11988.
This commit is contained in:
@ -152,7 +152,7 @@ public:
|
|||||||
char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
|
char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
|
||||||
string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
|
string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
|
||||||
bool isctype(char_type c, char_class_type f) const;
|
bool isctype(char_type c, char_class_type f) const;
|
||||||
int toi(const char_type*& p1, const char_type* p2, int radix)const
|
boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
|
||||||
{
|
{
|
||||||
return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,21 @@ namespace BOOST_REGEX_DETAIL_NS{
|
|||||||
#pragma warning(disable:4244 4800)
|
#pragma warning(disable:4244 4800)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
inline boost::intmax_t umax(mpl::false_ const&)
|
||||||
|
{
|
||||||
|
// Get out clause here, just in case numeric_limits is unspecialized:
|
||||||
|
return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
|
||||||
|
}
|
||||||
|
inline boost::intmax_t umax(mpl::true_ const&)
|
||||||
|
{
|
||||||
|
return (std::numeric_limits<std::size_t>::max)();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline boost::intmax_t umax()
|
||||||
|
{
|
||||||
|
return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
|
||||||
|
}
|
||||||
|
|
||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
class basic_regex_parser : public basic_regex_creator<charT, traits>
|
class basic_regex_parser : public basic_regex_creator<charT, traits>
|
||||||
{
|
{
|
||||||
@ -868,7 +883,7 @@ escape_type_class_jump:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const charT* pc = m_position;
|
const charT* pc = m_position;
|
||||||
int i = this->m_traits.toi(pc, m_end, 10);
|
boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
|
||||||
if((i < 0) && syn_end)
|
if((i < 0) && syn_end)
|
||||||
{
|
{
|
||||||
// Check for a named capture, get the leftmost one if there is more than one:
|
// Check for a named capture, get the leftmost one if there is more than one:
|
||||||
@ -1075,7 +1090,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
// parse a repeat-range:
|
// parse a repeat-range:
|
||||||
//
|
//
|
||||||
std::size_t min, max;
|
std::size_t min, max;
|
||||||
int v;
|
boost::intmax_t v;
|
||||||
// skip whitespace:
|
// skip whitespace:
|
||||||
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
||||||
++m_position;
|
++m_position;
|
||||||
@ -1094,7 +1109,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
// get min:
|
// get min:
|
||||||
v = this->m_traits.toi(m_position, m_end, 10);
|
v = this->m_traits.toi(m_position, m_end, 10);
|
||||||
// skip whitespace:
|
// skip whitespace:
|
||||||
if(v < 0)
|
if((v < 0) || (v > umax()))
|
||||||
{
|
{
|
||||||
if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
||||||
{
|
{
|
||||||
@ -1120,7 +1135,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
||||||
return parse_literal();
|
return parse_literal();
|
||||||
}
|
}
|
||||||
min = v;
|
min = static_cast<std::size_t>(v);
|
||||||
// see if we have a comma:
|
// see if we have a comma:
|
||||||
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
|
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
|
||||||
{
|
{
|
||||||
@ -1143,7 +1158,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
|||||||
}
|
}
|
||||||
// get the value if any:
|
// get the value if any:
|
||||||
v = this->m_traits.toi(m_position, m_end, 10);
|
v = this->m_traits.toi(m_position, m_end, 10);
|
||||||
max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
|
max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1665,19 +1680,19 @@ digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_cha
|
|||||||
// does a value fit in the specified charT type?
|
// does a value fit in the specified charT type?
|
||||||
//
|
//
|
||||||
template <class charT>
|
template <class charT>
|
||||||
bool valid_value(charT, int v, const mpl::true_&)
|
bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
|
||||||
{
|
{
|
||||||
return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
|
return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
|
||||||
}
|
}
|
||||||
template <class charT>
|
template <class charT>
|
||||||
bool valid_value(charT, int, const mpl::false_&)
|
bool valid_value(charT, boost::intmax_t, const mpl::false_&)
|
||||||
{
|
{
|
||||||
return true; // v will alsways fit in a charT
|
return true; // v will alsways fit in a charT
|
||||||
}
|
}
|
||||||
template <class charT>
|
template <class charT>
|
||||||
bool valid_value(charT c, int v)
|
bool valid_value(charT c, boost::intmax_t v)
|
||||||
{
|
{
|
||||||
return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
|
return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
@ -1753,10 +1768,10 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
|||||||
fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
|
fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
int i = this->m_traits.toi(m_position, m_end, 16);
|
boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
|
||||||
if((m_position == m_end)
|
if((m_position == m_end)
|
||||||
|| (i < 0)
|
|| (i < 0)
|
||||||
|| ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
|
|| ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
|
||||||
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
||||||
{
|
{
|
||||||
// Rewind to start of escape:
|
// Rewind to start of escape:
|
||||||
@ -1771,7 +1786,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
|
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
|
||||||
int i = this->m_traits.toi(m_position, m_position + len, 16);
|
boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
|
||||||
if((i < 0)
|
if((i < 0)
|
||||||
|| !valid_value(charT(0), i))
|
|| !valid_value(charT(0), i))
|
||||||
{
|
{
|
||||||
@ -1790,7 +1805,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
|||||||
// followed by up to 3 octal digits:
|
// followed by up to 3 octal digits:
|
||||||
std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
|
std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
|
||||||
const charT* bp = m_position;
|
const charT* bp = m_position;
|
||||||
int val = this->m_traits.toi(bp, bp + 1, 8);
|
boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
|
||||||
if(val != 0)
|
if(val != 0)
|
||||||
{
|
{
|
||||||
// Rewind to start of escape:
|
// Rewind to start of escape:
|
||||||
@ -1801,7 +1816,7 @@ charT basic_regex_parser<charT, traits>::unescape_character()
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
val = this->m_traits.toi(m_position, m_position + len, 8);
|
val = this->m_traits.toi(m_position, m_position + len, 8);
|
||||||
if(val < 0)
|
if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
|
||||||
{
|
{
|
||||||
// Rewind to start of escape:
|
// Rewind to start of escape:
|
||||||
--m_position;
|
--m_position;
|
||||||
@ -1874,7 +1889,7 @@ bool basic_regex_parser<charT, traits>::parse_backref()
|
|||||||
{
|
{
|
||||||
BOOST_ASSERT(m_position != m_end);
|
BOOST_ASSERT(m_position != m_end);
|
||||||
const charT* pc = m_position;
|
const charT* pc = m_position;
|
||||||
int i = this->m_traits.toi(pc, pc + 1, 10);
|
boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
|
||||||
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
|
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
|
||||||
{
|
{
|
||||||
// not a backref at all but an octal escape sequence:
|
// not a backref at all but an octal escape sequence:
|
||||||
@ -1996,7 +2011,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
|||||||
int max_mark = m_max_mark;
|
int max_mark = m_max_mark;
|
||||||
m_mark_reset = -1;
|
m_mark_reset = -1;
|
||||||
m_max_mark = m_mark_count;
|
m_max_mark = m_mark_count;
|
||||||
int v;
|
boost::intmax_t v;
|
||||||
//
|
//
|
||||||
// select the actual extension used:
|
// select the actual extension used:
|
||||||
//
|
//
|
||||||
|
@ -1027,11 +1027,11 @@ public:
|
|||||||
return m_pimpl->isctype(c, f);
|
return m_pimpl->isctype(c, f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
int toi(const charT*& p1, const charT* p2, int radix)const;
|
boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const;
|
||||||
int value(charT c, int radix)const
|
int value(charT c, int radix)const
|
||||||
{
|
{
|
||||||
const charT* pc = &c;
|
const charT* pc = &c;
|
||||||
return toi(pc, pc + 1, radix);
|
return (int)toi(pc, pc + 1, radix);
|
||||||
}
|
}
|
||||||
locale_type imbue(locale_type l)
|
locale_type imbue(locale_type l)
|
||||||
{
|
{
|
||||||
@ -1069,7 +1069,7 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
template <class charT>
|
template <class charT>
|
||||||
int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
|
boost::intmax_t cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
|
||||||
{
|
{
|
||||||
BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers.
|
BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers.
|
||||||
std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
|
std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
|
||||||
@ -1082,7 +1082,7 @@ int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int rad
|
|||||||
if(std::abs(radix) == 16) is >> std::hex;
|
if(std::abs(radix) == 16) is >> std::hex;
|
||||||
else if(std::abs(radix) == 8) is >> std::oct;
|
else if(std::abs(radix) == 8) is >> std::oct;
|
||||||
else is >> std::dec;
|
else is >> std::dec;
|
||||||
int val;
|
boost::intmax_t val;
|
||||||
if(is >> val)
|
if(is >> val)
|
||||||
{
|
{
|
||||||
first = first + ((last - first) - sbuf.in_avail());
|
first = first + ((last - first) - sbuf.in_avail());
|
||||||
|
@ -157,7 +157,7 @@ bool cpp_regex_traits_implementation<BOOST_REGEX_CHAR_T>::isctype(const BOOST_RE
|
|||||||
#endif
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
template BOOST_REGEX_DECL
|
template BOOST_REGEX_DECL
|
||||||
int cpp_regex_traits<BOOST_REGEX_CHAR_T>::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const;
|
boost::intmax_t cpp_regex_traits<BOOST_REGEX_CHAR_T>::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const;
|
||||||
template BOOST_REGEX_DECL
|
template BOOST_REGEX_DECL
|
||||||
std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::catalog_name(const std::string& name);
|
std::string cpp_regex_traits<BOOST_REGEX_CHAR_T>::catalog_name(const std::string& name);
|
||||||
template BOOST_REGEX_DECL
|
template BOOST_REGEX_DECL
|
||||||
|
@ -206,7 +206,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
|
|||||||
search_base = base;
|
search_base = base;
|
||||||
state_count = 0;
|
state_count = 0;
|
||||||
m_match_flags |= regex_constants::match_all;
|
m_match_flags |= regex_constants::match_all;
|
||||||
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
|
m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
|
||||||
m_presult->set_base(base);
|
m_presult->set_base(base);
|
||||||
m_presult->set_named_subs(this->re.get_named_subs());
|
m_presult->set_named_subs(this->re.get_named_subs());
|
||||||
if(m_match_flags & match_posix)
|
if(m_match_flags & match_posix)
|
||||||
@ -268,7 +268,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
|
|||||||
// reset our state machine:
|
// reset our state machine:
|
||||||
search_base = position = base;
|
search_base = position = base;
|
||||||
pstate = re.get_first_state();
|
pstate = re.get_first_state();
|
||||||
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last);
|
m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
|
||||||
m_presult->set_base(base);
|
m_presult->set_base(base);
|
||||||
m_presult->set_named_subs(this->re.get_named_subs());
|
m_presult->set_named_subs(this->re.get_named_subs());
|
||||||
m_match_flags |= regex_constants::match_init;
|
m_match_flags |= regex_constants::match_init;
|
||||||
@ -287,13 +287,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
|
|||||||
++position;
|
++position;
|
||||||
}
|
}
|
||||||
// reset $` start:
|
// reset $` start:
|
||||||
m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
|
m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
|
||||||
//if((base != search_base) && (base == backstop))
|
//if((base != search_base) && (base == backstop))
|
||||||
// m_match_flags |= match_prev_avail;
|
// m_match_flags |= match_prev_avail;
|
||||||
}
|
}
|
||||||
if(m_match_flags & match_posix)
|
if(m_match_flags & match_posix)
|
||||||
{
|
{
|
||||||
m_result.set_size(1 + re.mark_count(), base, last);
|
m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
|
||||||
m_result.set_base(base);
|
m_result.set_base(base);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ struct trivial_format_traits
|
|||||||
}
|
}
|
||||||
int toi(const charT*& p1, const charT* p2, int radix)const
|
int toi(const charT*& p1, const charT* p2, int radix)const
|
||||||
{
|
{
|
||||||
return global_toi(p1, p2, radix, *this);
|
return (int)global_toi(p1, p2, radix, *this);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -165,7 +165,7 @@ private:
|
|||||||
std::vector<char_type> v(i, j);
|
std::vector<char_type> v(i, j);
|
||||||
const char_type* start = &v[0];
|
const char_type* start = &v[0];
|
||||||
const char_type* pos = start;
|
const char_type* pos = start;
|
||||||
int r = m_traits.toi(pos, &v[0] + v.size(), base);
|
int r = (int)m_traits.toi(pos, &v[0] + v.size(), base);
|
||||||
std::advance(i, pos - start);
|
std::advance(i, pos - start);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,7 @@ struct default_wrapper : public BaseT
|
|||||||
{
|
{
|
||||||
return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::escape_type_identity;
|
return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast<char>(c)) : ::boost::regex_constants::escape_type_identity;
|
||||||
}
|
}
|
||||||
int toi(const char_type*& p1, const char_type* p2, int radix)const
|
boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
|
||||||
{
|
{
|
||||||
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
||||||
}
|
}
|
||||||
|
@ -304,13 +304,13 @@ int global_value(charT c)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
template <class charT, class traits>
|
template <class charT, class traits>
|
||||||
int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
|
boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
|
||||||
{
|
{
|
||||||
(void)t; // warning suppression
|
(void)t; // warning suppression
|
||||||
int next_value = t.value(*p1, radix);
|
boost::intmax_t next_value = t.value(*p1, radix);
|
||||||
if((p1 == p2) || (next_value < 0) || (next_value >= radix))
|
if((p1 == p2) || (next_value < 0) || (next_value >= radix))
|
||||||
return -1;
|
return -1;
|
||||||
int result = 0;
|
boost::intmax_t result = 0;
|
||||||
while(p1 != p2)
|
while(p1 != p2)
|
||||||
{
|
{
|
||||||
next_value = t.value(*p1, radix);
|
next_value = t.value(*p1, radix);
|
||||||
|
@ -640,13 +640,13 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int toi(const charT*& p1, const charT* p2, int radix)const
|
boost::intmax_t toi(const charT*& p1, const charT* p2, int radix)const
|
||||||
{
|
{
|
||||||
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
|
||||||
}
|
}
|
||||||
int value(charT c, int radix)const
|
int value(charT c, int radix)const
|
||||||
{
|
{
|
||||||
int result = ::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
|
int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
|
||||||
return result < radix ? result : -1;
|
return result < radix ? result : -1;
|
||||||
}
|
}
|
||||||
locale_type imbue(locale_type l)
|
locale_type imbue(locale_type l)
|
||||||
|
@ -153,6 +153,7 @@ test-suite regex
|
|||||||
[ link concepts/concept_check.cpp ../build//boost_regex ]
|
[ link concepts/concept_check.cpp ../build//boost_regex ]
|
||||||
[ link concepts/icu_concept_check.cpp ../build//boost_regex ]
|
[ link concepts/icu_concept_check.cpp ../build//boost_regex ]
|
||||||
[ link concepts/range_concept_check.cpp ../build//boost_regex ]
|
[ link concepts/range_concept_check.cpp ../build//boost_regex ]
|
||||||
|
[ run concepts/test_bug_11988.cpp ../build//boost_regex ]
|
||||||
|
|
||||||
[ run
|
[ run
|
||||||
# sources
|
# sources
|
||||||
|
Reference in New Issue
Block a user