Fixed out-of-bounds access in utf8_codecvt_facet::do_length.

The loop in do_length used to dereference from_end pointer, which
could point to an out-of-bounds memory. Rewritten the loop to
avoid this and also make the logic a bit more clear.

Closes https://github.com/boostorg/detail/pull/21.
This commit is contained in:
Andrey Semashev
2021-03-17 14:12:36 +03:00
parent 99fc546b78
commit 131208d8cc

View File

@@ -188,26 +188,17 @@ int utf8_codecvt_facet::do_length(
#if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
throw()
#endif
{
// RG - this code is confusing! I need a better way to express it.
// and test cases.
// Invariants:
// 1) last_octet_count has the size of the last measured character
// 2) char_count holds the number of characters shown to fit
// within the bounds so far (no greater than max_limit)
// 3) from_next points to the octet 'last_octet_count' before the
// last measured character.
int last_octet_count=0;
std::size_t char_count = 0;
const char* from_next = from;
// Use "<" because the buffer may represent incomplete characters
while (from_next+last_octet_count <= from_end && char_count <= max_limit) {
from_next += last_octet_count;
last_octet_count = (get_octet_count(*from_next));
++char_count;
{
const char * from_next = from;
for (std::size_t char_count = 0u; char_count < max_limit && from_next < from_end; ++char_count) {
unsigned int octet_count = get_octet_count(*from_next);
// The buffer may represent incomplete characters, so terminate early if one is found
if (octet_count > static_cast<std::size_t>(from_end - from_next))
break;
from_next += octet_count;
}
return static_cast<int>(from_next-from);
return static_cast<int>(from_next - from);
}
unsigned int utf8_codecvt_facet::get_octet_count(