mirror of
https://github.com/boostorg/regex.git
synced 2026-04-28 18:02:15 +02:00
Add checks for invalid UTF-8 sequences, see: https://svn.boost.org/trac/boost/ticket/9473
This commit is contained in:
@@ -26,6 +26,7 @@ this gets bumped up from v4 to v5.
|
||||
* Fixed bug in collation code that failed if the locale generated collation strings with embedded nul's,
|
||||
see [@https://svn.boost.org/trac/boost/ticket/9451 #9451].
|
||||
* Apply patch for unusual thread usage (no statically initiallized mutexes), see [@https://svn.boost.org/trac/boost/ticket/9461 #9461].
|
||||
* Added better checks for invalid UTF-8 sequences, see [@https://svn.boost.org/trac/boost/ticket/9473 #9473].
|
||||
|
||||
[h4 Boost-1.54]
|
||||
|
||||
|
||||
@@ -59,6 +59,13 @@
|
||||
Fixed bug in collation code that failed if the locale generated collation
|
||||
strings with embedded nul's, see <a href="https://svn.boost.org/trac/boost/ticket/9451" target="_top">#9451</a>.
|
||||
</li>
|
||||
<li class="listitem">
|
||||
Apply patch for unusual thread usage (no statically initiallized mutexes),
|
||||
see <a href="https://svn.boost.org/trac/boost/ticket/9461" target="_top">#9461</a>.
|
||||
</li>
|
||||
<li class="listitem">
|
||||
Added better checks for invalid UTF-8 sequences, see <a href="https://svn.boost.org/trac/boost/ticket/9473" target="_top">#9473</a>.
|
||||
</li>
|
||||
</ul></div>
|
||||
<h5>
|
||||
<a name="boost_regex.background_information.history.h1"></a>
|
||||
|
||||
+1
-1
@@ -198,7 +198,7 @@
|
||||
</p>
|
||||
</div>
|
||||
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
|
||||
<td align="left"><p><small>Last revised: December 18, 2013 at 17:16:00 GMT</small></p></td>
|
||||
<td align="left"><p><small>Last revised: December 19, 2013 at 10:47:27 GMT</small></p></td>
|
||||
<td align="right"><div class="copyright-footer"></div></td>
|
||||
</tr></table>
|
||||
<hr>
|
||||
|
||||
@@ -629,9 +629,15 @@ private:
|
||||
0x1FFFFFu,
|
||||
};
|
||||
m_value &= masks[extra];
|
||||
// check the result:
|
||||
// check the result is in range:
|
||||
if(m_value > static_cast<U32Type>(0x10FFFFu))
|
||||
invalid_sequence();
|
||||
// The result must not be a surrogate:
|
||||
if((m_value >= static_cast<U32Type>(0xD800)) && (m_value <= static_cast<U32Type>(0xDFFF)))
|
||||
invalid_sequence();
|
||||
// We should not have had an invalidly encoded UTF8 sequence:
|
||||
if((extra > 0) && (m_value <= static_cast<U32Type>(masks[extra - 1])))
|
||||
invalid_sequence();
|
||||
}
|
||||
BaseIterator m_position;
|
||||
mutable U32Type m_value;
|
||||
|
||||
@@ -108,6 +108,46 @@ void spot_checks()
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3, bad_seq3, bad_seq3 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq3 + 5, bad_seq3, bad_seq3 + 5)), std::out_of_range);
|
||||
boost::uint8_t bad_seq4[5] = { '.', '*', 0xf6, '.', '*' };
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4, bad_seq4, bad_seq4 + 5), boost::u8_to_u32_iterator<const boost::uint8_t*>(bad_seq4 + 5, bad_seq4, bad_seq4 + 5)), std::out_of_range);
|
||||
|
||||
// Invalid sequences containing surrogate pairs:
|
||||
const char* invalid_pseq = "\xed\xa0\x80"; // single lowest lead surrogate U+D800
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xed\xb0\x80"; // single lowest trail surrogate U+DC00
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xed\xb0\x80"; // single lowest trail surrogate U+DC00
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xed\xbf\xbf"; // single highest trail surrogate U+DFFF
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
|
||||
// overlong encodings (created by left-padding with zero bits)
|
||||
invalid_pseq = "\xc0\x80"; // illegal 2-byte encoding of 1-byte character U+0000
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xe0\x80\x80"; // illegal 3-byte encoding of 1-byte character U+0000
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xf0\x80\x80\x80"; // illegal 4-byte encoding of 1-byte character U+0000
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
|
||||
invalid_pseq = "\xc1\xbf"; // illegal 2-byte encoding of 1-byte character U+007F
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xe0\x81\xbf"; // illegal 3-byte encoding of 1-byte character U+007F
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xf0\x80\x81\xbf"; // illegal 4-byte encoding of 1-byte character U+007F
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
|
||||
invalid_pseq = "\xe0\x82\x80"; // illegal 3-byte encoding of 2-byte character U+0080
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xf0\x80\x82\x80"; // illegal 4-byte encoding of 2-byte character U+0080
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
|
||||
invalid_pseq = "\xe0\x9f\xbf"; // illegal 3-byte encoding of 2-byte character U+07FF
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xf0\x80\x9f\xbf"; // illegal 4-byte encoding of 2-byte character U+07FF
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
|
||||
invalid_pseq = "\xf0\x80\xa0\x80"; // illegal 4-byte encoding of 3-byte character U+0800
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
invalid_pseq = "\xf0\x8f\xbf\xbf"; // illegal 4-byte encoding of 3-byte character U+FFFF
|
||||
BOOST_CHECK_THROW(iterate_over(boost::u8_to_u32_iterator<const char*>(invalid_pseq, invalid_pseq, invalid_pseq + std::strlen(invalid_pseq)), boost::u8_to_u32_iterator<const char*>(invalid_pseq + std::strlen(invalid_pseq), invalid_pseq, invalid_pseq + std::strlen(invalid_pseq))), std::out_of_range);
|
||||
}
|
||||
|
||||
void test(const std::vector< ::boost::uint32_t>& v)
|
||||
|
||||
Reference in New Issue
Block a user