Fix a rare case of incorrect UTF8 validation

fix #1245, fix #1249

The value used to fill the incomplete code point buffer is
changed to a character which no longer causes
utf8_checker::valid() to incorrectly return false.
This commit is contained in:
Boris Sergeev
2018-09-18 14:38:15 -06:00
committed by Vinnie Falco
parent 4e7c260403
commit a4c008907d
4 changed files with 162 additions and 56 deletions

View File

@@ -1,3 +1,9 @@
Version 183:
* Fix a rare case of failed UTF8 validation
--------------------------------------------------------------------------------
Version 182: Version 182:
* Silence ubsan false positive * Silence ubsan false positive

View File

@@ -9,6 +9,14 @@
[section Release Notes] [section Release Notes]
[heading Boost 1.69]
[*Fixes]
* ([issue 1245]) Fix a rare case of incorrect UTF8 validation
[heading Boost 1.68]
This version fixes a missing executor work guard in all composed operations This version fixes a missing executor work guard in all composed operations
used in the implementation. Users who are experiencing crashes related to used in the implementation. Users who are experiencing crashes related to
asynchronous completion handlers are encouraged to upgrade. Also included asynchronous completion handlers are encouraged to upgrade. Also included
@@ -16,8 +24,6 @@ is an improved mechanism for generating random numbers used to mask outgoing
websocket frames when operating in the client mode. This resolves a websocket frames when operating in the client mode. This resolves a
vulnerability described in the Beast Hybrid Assessment Report from Bishop Fox. vulnerability described in the Beast Hybrid Assessment Report from Bishop Fox.
[heading Boost 1.68]
[*New Features] [*New Features]
The include directory `<beast/experimental>` contains features which are not The include directory `<beast/experimental>` contains features which are not

View File

@@ -112,7 +112,7 @@ write(std::uint8_t const* in, std::size_t size)
if((p[0] & 0xe0) == 0xc0) if((p[0] & 0xe0) == 0xc0)
{ {
if( (p[1] & 0xc0) != 0x80 || if( (p[1] & 0xc0) != 0x80 ||
(p[0] & 0xfe) == 0xc0) // overlong (p[0] & 0x1e) == 0) // overlong
return false; return false;
p += 2; p += 2;
return true; return true;
@@ -121,8 +121,8 @@ write(std::uint8_t const* in, std::size_t size)
{ {
if( (p[1] & 0xc0) != 0x80 if( (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80 || (p[2] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0xe0) == 0x80) // overlong || (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0xe0) == 0xa0) // surrogate || (p[0] == 0xed && (p[1] & 0x20) == 0x20) // surrogate
//|| (p[0] == 0xef && p[1] == 0xbf && (p[2] & 0xfe) == 0xbe) // U+FFFE or U+FFFF //|| (p[0] == 0xef && p[1] == 0xbf && (p[2] & 0xfe) == 0xbe) // U+FFFE or U+FFFF
) )
return false; return false;
@@ -131,10 +131,11 @@ write(std::uint8_t const* in, std::size_t size)
} }
if((p[0] & 0xf8) == 0xf0) if((p[0] & 0xf8) == 0xf0)
{ {
if( (p[1] & 0xc0) != 0x80 if( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80 || (p[2] & 0xc0) != 0x80
|| (p[3] & 0xc0) != 0x80 || (p[3] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0xf0) == 0x80) // overlong || (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4 // > U+10FFFF || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4 // > U+10FFFF
) )
return false; return false;
@@ -146,24 +147,74 @@ write(std::uint8_t const* in, std::size_t size)
auto const fail_fast = auto const fail_fast =
[&]() [&]()
{ {
auto const n = p_ - cp_; if(cp_[0] < 128)
switch(n)
{ {
default: return false;
BOOST_ASSERT(false);
BOOST_FALLTHROUGH;
case 1:
cp_[1] = 0x81;
BOOST_FALLTHROUGH;
case 2:
cp_[2] = 0x81;
BOOST_FALLTHROUGH;
case 3:
cp_[3] = 0x81;
break;
} }
std::uint8_t const* p = cp_;
return ! valid(p); const auto& p = cp_; // alias, only to keep this code similar to valid() above
const auto known_only = p_ - cp_;
if (known_only == 1)
{
if((p[0] & 0xe0) == 0xc0)
{
return ((p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return false;
}
if((p[0] & 0xf8) == 0xf0)
{
return ((p[0] & 0x07) >= 0x05); // invalid F5...FF characters
}
}
else if (known_only == 2)
{
if((p[0] & 0xe0) == 0xc0)
{
return ((p[1] & 0xc0) != 0x80 ||
(p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0x20) == 0x20)); // surrogate
}
if((p[0] & 0xf8) == 0xf0)
{
return ( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); // > U+10FFFF
}
}
else if (known_only == 3)
{
if((p[0] & 0xe0) == 0xc0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0x20) == 0x20)); // surrogate
//|| (p[0] == 0xef && p[1] == 0xbf && (p[2] & 0xfe) == 0xbe) // U+FFFE or U+FFFF
}
if((p[0] & 0xf8) == 0xf0)
{
return ( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); // > U+10FFFF
}
}
return true;
}; };
auto const needed = auto const needed =
[](std::uint8_t const v) [](std::uint8_t const v)

View File

@@ -58,15 +58,9 @@ public:
// three byte sequences // three byte sequences
for(unsigned char c = 224; c < 240; ++c) for(unsigned char c = 224; c < 240; ++c)
{ {
// fail fast
utf8_checker u; utf8_checker u;
if (c == 224) BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.write(&c, 1)); BEAST_EXPECT(! u.finish());
else
{
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
} }
// four byte sequences // four byte sequences
@@ -74,13 +68,8 @@ public:
{ {
// fail fast // fail fast
utf8_checker u; utf8_checker u;
if (c == 240) BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.write(&c, 1)); BEAST_EXPECT(! u.finish());
else
{
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
} }
// invalid lead bytes // invalid lead bytes
@@ -167,8 +156,11 @@ public:
BEAST_EXPECT(u.write(buf, 3)); BEAST_EXPECT(u.write(buf, 3));
BEAST_EXPECT(u.finish()); BEAST_EXPECT(u.finish());
// Segmented sequence // Segmented sequence
if (i == 224) if (i == 224)
BEAST_EXPECT(! u.write(buf, 1)); {
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(!u.finish());
}
else else
{ {
BEAST_EXPECT(u.write(buf, 1)); BEAST_EXPECT(u.write(buf, 1));
@@ -270,8 +262,10 @@ public:
} }
// Segmented sequence second byte invalid // Segmented sequence second byte invalid
if (i == 224) if (i == 224) {
BEAST_EXPECT(! u.write(buf, 1)); BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(!u.finish());
}
else else
{ {
BEAST_EXPECT(u.write(buf, 1)); BEAST_EXPECT(u.write(buf, 1));
@@ -311,13 +305,8 @@ public:
BEAST_EXPECT(u.write(buf, 4)); BEAST_EXPECT(u.write(buf, 4));
BEAST_EXPECT(u.finish()); BEAST_EXPECT(u.finish());
// Segmented sequence // Segmented sequence
if (i == 240) BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(! u.write(buf, 1)); BEAST_EXPECT(u.write(&buf[1], 3));
else
{
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(u.write(&buf[1], 3));
}
u.reset(); u.reset();
// Segmented sequence // Segmented sequence
BEAST_EXPECT(u.write(buf, 2)); BEAST_EXPECT(u.write(buf, 2));
@@ -424,13 +413,9 @@ public:
} }
// Segmented sequence second byte invalid // Segmented sequence second byte invalid
if (i == 240) BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(! u.write(buf, 1)); BEAST_EXPECT(! u.write(&buf[1], 1));
else
{
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(! u.write(&buf[1], 1));
}
u.reset(); u.reset();
} }
@@ -534,6 +519,53 @@ public:
} }
} }
void
AutodeskTests()
{
std::vector<std::vector<std::uint8_t>> const data{
{ 's','t','a','r','t', 0xE0 },
{ 0xA6, 0x81, 'e','n','d' } };
utf8_checker u;
for(auto const& s : data)
{
std::size_t n = s.size();
buffers_suffix<boost::asio::const_buffer> cb{boost::asio::const_buffer(s.data(), n)};
multi_buffer b;
while(n)
{
auto const amount = (std::min)(n, std::size_t(3)/*size*/);
b.commit(boost::asio::buffer_copy(b.prepare(amount), cb));
cb.consume(amount);
n -= amount;
}
BEAST_EXPECT(u.write(b.data()));
}
BEAST_EXPECT(u.finish());
}
void
AutobahnTest(std::vector<std::vector<std::uint8_t>>&& data, std::vector<bool> result)
{
BEAST_EXPECT(data.size() == result.size());
utf8_checker u;
for(std::size_t i = 0; i < data.size(); ++i)
{
auto const& s = data[i];
std::size_t n = s.size();
buffers_suffix<boost::asio::const_buffer> cb{boost::asio::const_buffer(s.data(), n)};
multi_buffer b;
while(n)
{
auto const amount = (std::min)(n, std::size_t(3)/*size*/);
b.commit(boost::asio::buffer_copy(b.prepare(amount), cb));
cb.consume(amount);
n -= amount;
}
BEAST_EXPECT(u.write(b.data()) == result[i]);
}
}
void void
run() override run() override
{ {
@@ -543,6 +575,17 @@ public:
testFourByteSequence(); testFourByteSequence();
testWithStreamBuffer(); testWithStreamBuffer();
testBranches(); testBranches();
AutodeskTests();
// 6.4.2
AutobahnTest(std::vector<std::vector<std::uint8_t>>{
{ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
{ 0x90 }, { 0x80, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 } },
{ true, false, false});
// 6.4.4
AutobahnTest(std::vector<std::vector<std::uint8_t>>{
{ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
{ 0x90 } },
{ true, false });
} }
}; };