From 131208d8ccd82ef69afb9cf0bad1a314bd931d88 Mon Sep 17 00:00:00 2001 From: Andrey Semashev Date: Wed, 17 Mar 2021 14:12:36 +0300 Subject: [PATCH] Fixed out-of-bounds access in utf8_codecvt_facet::do_length. The loop in do_length used to dereference from_end pointer, which could point to an out-of-bounds memory. Rewritten the loop to avoid this and also make the logic a bit more clear. Closes https://github.com/boostorg/detail/pull/21. --- include/boost/detail/utf8_codecvt_facet.ipp | 29 +++++++-------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/include/boost/detail/utf8_codecvt_facet.ipp b/include/boost/detail/utf8_codecvt_facet.ipp index 24eb61a..564743f 100644 --- a/include/boost/detail/utf8_codecvt_facet.ipp +++ b/include/boost/detail/utf8_codecvt_facet.ipp @@ -188,26 +188,17 @@ int utf8_codecvt_facet::do_length( #if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600)) throw() #endif -{ - // RG - this code is confusing! I need a better way to express it. - // and test cases. - - // Invariants: - // 1) last_octet_count has the size of the last measured character - // 2) char_count holds the number of characters shown to fit - // within the bounds so far (no greater than max_limit) - // 3) from_next points to the octet 'last_octet_count' before the - // last measured character. - int last_octet_count=0; - std::size_t char_count = 0; - const char* from_next = from; - // Use "<" because the buffer may represent incomplete characters - while (from_next+last_octet_count <= from_end && char_count <= max_limit) { - from_next += last_octet_count; - last_octet_count = (get_octet_count(*from_next)); - ++char_count; +{ + const char * from_next = from; + for (std::size_t char_count = 0u; char_count < max_limit && from_next < from_end; ++char_count) { + unsigned int octet_count = get_octet_count(*from_next); + // The buffer may represent incomplete characters, so terminate early if one is found + if (octet_count > static_cast(from_end - from_next)) + break; + from_next += octet_count; } - return static_cast(from_next-from); + + return static_cast(from_next - from); } unsigned int utf8_codecvt_facet::get_octet_count(