forked from boostorg/regex
Add further error checking to UTF-8 decoding.
Fixes #7744. [SVN r81614]
This commit is contained in:
@ -520,9 +520,26 @@ public:
|
|||||||
}
|
}
|
||||||
void increment()
|
void increment()
|
||||||
{
|
{
|
||||||
|
// We must not start with a continuation character:
|
||||||
|
if((static_cast<boost::uint8_t>(*m_position) & 0xC0) == 0x80)
|
||||||
|
invalid_sequence();
|
||||||
// skip high surrogate first if there is one:
|
// skip high surrogate first if there is one:
|
||||||
unsigned c = detail::utf8_byte_count(*m_position);
|
unsigned c = detail::utf8_byte_count(*m_position);
|
||||||
|
if(m_value == pending_read)
|
||||||
|
{
|
||||||
|
// Since we haven't read in a value, we need to validate the code points:
|
||||||
|
for(unsigned i = 0; i < c; ++i)
|
||||||
|
{
|
||||||
|
++m_position;
|
||||||
|
// We must have a continuation byte:
|
||||||
|
if((i != c - 1) && ((static_cast<boost::uint8_t>(*m_position) & 0xC0) != 0x80))
|
||||||
|
invalid_sequence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
std::advance(m_position, c);
|
std::advance(m_position, c);
|
||||||
|
}
|
||||||
m_value = pending_read;
|
m_value = pending_read;
|
||||||
}
|
}
|
||||||
void decrement()
|
void decrement()
|
||||||
@ -589,7 +606,7 @@ private:
|
|||||||
// we must not have a continuation character:
|
// we must not have a continuation character:
|
||||||
if((m_value & 0xC0u) == 0x80u)
|
if((m_value & 0xC0u) == 0x80u)
|
||||||
invalid_sequence();
|
invalid_sequence();
|
||||||
// see how many extra byts we have:
|
// see how many extra bytes we have:
|
||||||
unsigned extra = detail::utf8_trailing_byte_count(*m_position);
|
unsigned extra = detail::utf8_trailing_byte_count(*m_position);
|
||||||
// extract the extra bits, 6 from each extra byte:
|
// extract the extra bits, 6 from each extra byte:
|
||||||
BaseIterator next(m_position);
|
BaseIterator next(m_position);
|
||||||
@ -597,6 +614,9 @@ private:
|
|||||||
{
|
{
|
||||||
++next;
|
++next;
|
||||||
m_value <<= 6;
|
m_value <<= 6;
|
||||||
|
// We must have a continuation byte:
|
||||||
|
if((static_cast<boost::uint8_t>(*next) & 0xC0) != 0x80)
|
||||||
|
invalid_sequence();
|
||||||
m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
|
m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
|
||||||
}
|
}
|
||||||
// we now need to remove a few of the leftmost bits, but how many depends
|
// we now need to remove a few of the leftmost bits, but how many depends
|
||||||
|
@ -124,8 +124,8 @@ test-suite regex
|
|||||||
../build//boost_regex
|
../build//boost_regex
|
||||||
]
|
]
|
||||||
|
|
||||||
[ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : <define>TEST_UTF8 : unicode_iterator_test_utf8 ]
|
[ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : release <define>TEST_UTF8 : unicode_iterator_test_utf8 ]
|
||||||
[ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : <define>TEST_UTF16 : unicode_iterator_test_utf16 ]
|
[ run unicode/unicode_iterator_test.cpp ../build//boost_regex : : : release <define>TEST_UTF16 : unicode_iterator_test_utf16 ]
|
||||||
[ run static_mutex/static_mutex_test.cpp
|
[ run static_mutex/static_mutex_test.cpp
|
||||||
../../thread/build//boost_thread ../build//boost_regex
|
../../thread/build//boost_thread ../build//boost_regex
|
||||||
]
|
]
|
||||||
|
@ -103,6 +103,11 @@ void spot_checks()
|
|||||||
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2, bad_seq2, bad_seq2 + 5), std::out_of_range);
|
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2, bad_seq2, bad_seq2 + 5), std::out_of_range);
|
||||||
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2 + 1, bad_seq2 + 6), std::out_of_range);
|
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2 + 1, bad_seq2 + 6), std::out_of_range);
|
||||||
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2, bad_seq2 + 6), std::out_of_range);
|
BOOST_CHECK_THROW(boost::u16_to_u32_iterator<const boost::uint16_t*>(bad_seq2 + 1, bad_seq2, bad_seq2 + 6), std::out_of_range);
|
||||||
|
|
||||||
|
boost::uint8_t bad_seq3[5] = { '.', '*', 0xe4, '.', '*' };
|
||||||
|
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3, bad_seq3, bad_seq3 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3 + 5, bad_seq3, bad_seq3 + 5)), std::out_of_range);
|
||||||
|
boost::uint8_t bad_seq4[5] = { '.', '*', 0xf6, '.', '*' };
|
||||||
|
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4, bad_seq4, bad_seq4 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4 + 5, bad_seq4, bad_seq4 + 5)), std::out_of_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test(const std::vector< ::boost::uint32_t>& v)
|
void test(const std::vector< ::boost::uint32_t>& v)
|
||||||
|
Reference in New Issue
Block a user