Fix a rare case of incorrect UTF8 validation

fix #1245, fix #1249

The value used to fill the incomplete code point buffer is
changed to a character which no longer causes
utf8_checker::valid() to incorrectly return false.
This commit is contained in:
Boris Sergeev
2018-09-18 14:38:15 -06:00
committed by Vinnie Falco
parent 4e7c260403
commit a4c008907d
4 changed files with 162 additions and 56 deletions

View File

@@ -1,3 +1,9 @@
Version 183:
* Fix a rare case of failed UTF8 validation
--------------------------------------------------------------------------------
Version 182:
* Silence ubsan false positive

View File

@@ -9,6 +9,14 @@
[section Release Notes]
[heading Boost 1.69]
[*Fixes]
* ([issue 1245]) Fix a rare case of incorrect UTF8 validation
[heading Boost 1.68]
This version fixes a missing executor work guard in all composed operations
used in the implementation. Users who are experiencing crashes related to
asynchronous completion handlers are encouraged to upgrade. Also included
@@ -16,8 +24,6 @@ is an improved mechanism for generating random numbers used to mask outgoing
websocket frames when operating in the client mode. This resolves a
vulnerability described in the Beast Hybrid Assessment Report from Bishop Fox.
[heading Boost 1.68]
[*New Features]
The include directory `<beast/experimental>` contains features which are not

View File

@@ -112,7 +112,7 @@ write(std::uint8_t const* in, std::size_t size)
if((p[0] & 0xe0) == 0xc0)
{
if( (p[1] & 0xc0) != 0x80 ||
(p[0] & 0xfe) == 0xc0) // overlong
(p[0] & 0x1e) == 0) // overlong
return false;
p += 2;
return true;
@@ -121,8 +121,8 @@ write(std::uint8_t const* in, std::size_t size)
{
if( (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0xe0) == 0x80) // overlong
|| (p[0] == 0xed && (p[1] & 0xe0) == 0xa0) // surrogate
|| (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0x20) == 0x20) // surrogate
//|| (p[0] == 0xef && p[1] == 0xbf && (p[2] & 0xfe) == 0xbe) // U+FFFE or U+FFFF
)
return false;
@@ -131,10 +131,11 @@ write(std::uint8_t const* in, std::size_t size)
}
if((p[0] & 0xf8) == 0xf0)
{
if( (p[1] & 0xc0) != 0x80
if( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[3] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0xf0) == 0x80) // overlong
|| (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4 // > U+10FFFF
)
return false;
@@ -146,24 +147,74 @@ write(std::uint8_t const* in, std::size_t size)
auto const fail_fast =
[&]()
{
auto const n = p_ - cp_;
switch(n)
if(cp_[0] < 128)
{
default:
BOOST_ASSERT(false);
BOOST_FALLTHROUGH;
case 1:
cp_[1] = 0x81;
BOOST_FALLTHROUGH;
case 2:
cp_[2] = 0x81;
BOOST_FALLTHROUGH;
case 3:
cp_[3] = 0x81;
break;
return false;
}
std::uint8_t const* p = cp_;
return ! valid(p);
const auto& p = cp_; // alias, only to keep this code similar to valid() above
const auto known_only = p_ - cp_;
if (known_only == 1)
{
if((p[0] & 0xe0) == 0xc0)
{
return ((p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return false;
}
if((p[0] & 0xf8) == 0xf0)
{
return ((p[0] & 0x07) >= 0x05); // invalid F5...FF characters
}
}
else if (known_only == 2)
{
if((p[0] & 0xe0) == 0xc0)
{
return ((p[1] & 0xc0) != 0x80 ||
(p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0x20) == 0x20)); // surrogate
}
if((p[0] & 0xf8) == 0xf0)
{
return ( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); // > U+10FFFF
}
}
else if (known_only == 3)
{
if((p[0] & 0xe0) == 0xc0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[0] & 0x1e) == 0); // overlong
}
if((p[0] & 0xf0) == 0xe0)
{
return ( (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[0] == 0xe0 && (p[1] & 0x20) == 0) // overlong
|| (p[0] == 0xed && (p[1] & 0x20) == 0x20)); // surrogate
//|| (p[0] == 0xef && p[1] == 0xbf && (p[2] & 0xfe) == 0xbe) // U+FFFE or U+FFFF
}
if((p[0] & 0xf8) == 0xf0)
{
return ( (p[0] & 0x07) >= 0x05 // invalid F5...FF characters
|| (p[1] & 0xc0) != 0x80
|| (p[2] & 0xc0) != 0x80
|| (p[0] == 0xf0 && (p[1] & 0x30) == 0) // overlong
|| (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); // > U+10FFFF
}
}
return true;
};
auto const needed =
[](std::uint8_t const v)

View File

@@ -58,15 +58,9 @@ public:
// three byte sequences
for(unsigned char c = 224; c < 240; ++c)
{
// fail fast
utf8_checker u;
if (c == 224)
BEAST_EXPECT(! u.write(&c, 1));
else
{
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
// four byte sequences
@@ -74,13 +68,8 @@ public:
{
// fail fast
utf8_checker u;
if (c == 240)
BEAST_EXPECT(! u.write(&c, 1));
else
{
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
BEAST_EXPECT(u.write(&c, 1));
BEAST_EXPECT(! u.finish());
}
// invalid lead bytes
@@ -167,8 +156,11 @@ public:
BEAST_EXPECT(u.write(buf, 3));
BEAST_EXPECT(u.finish());
// Segmented sequence
if (i == 224)
BEAST_EXPECT(! u.write(buf, 1));
if (i == 224)
{
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(!u.finish());
}
else
{
BEAST_EXPECT(u.write(buf, 1));
@@ -270,8 +262,10 @@ public:
}
// Segmented sequence second byte invalid
if (i == 224)
BEAST_EXPECT(! u.write(buf, 1));
if (i == 224) {
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(!u.finish());
}
else
{
BEAST_EXPECT(u.write(buf, 1));
@@ -311,13 +305,8 @@ public:
BEAST_EXPECT(u.write(buf, 4));
BEAST_EXPECT(u.finish());
// Segmented sequence
if (i == 240)
BEAST_EXPECT(! u.write(buf, 1));
else
{
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(u.write(&buf[1], 3));
}
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(u.write(&buf[1], 3));
u.reset();
// Segmented sequence
BEAST_EXPECT(u.write(buf, 2));
@@ -424,13 +413,9 @@ public:
}
// Segmented sequence second byte invalid
if (i == 240)
BEAST_EXPECT(! u.write(buf, 1));
else
{
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(! u.write(&buf[1], 1));
}
BEAST_EXPECT(u.write(buf, 1));
BEAST_EXPECT(! u.write(&buf[1], 1));
u.reset();
}
@@ -534,6 +519,53 @@ public:
}
}
void
AutodeskTests()
{
std::vector<std::vector<std::uint8_t>> const data{
{ 's','t','a','r','t', 0xE0 },
{ 0xA6, 0x81, 'e','n','d' } };
utf8_checker u;
for(auto const& s : data)
{
std::size_t n = s.size();
buffers_suffix<boost::asio::const_buffer> cb{boost::asio::const_buffer(s.data(), n)};
multi_buffer b;
while(n)
{
auto const amount = (std::min)(n, std::size_t(3)/*size*/);
b.commit(boost::asio::buffer_copy(b.prepare(amount), cb));
cb.consume(amount);
n -= amount;
}
BEAST_EXPECT(u.write(b.data()));
}
BEAST_EXPECT(u.finish());
}
void
AutobahnTest(std::vector<std::vector<std::uint8_t>>&& data, std::vector<bool> result)
{
BEAST_EXPECT(data.size() == result.size());
utf8_checker u;
for(std::size_t i = 0; i < data.size(); ++i)
{
auto const& s = data[i];
std::size_t n = s.size();
buffers_suffix<boost::asio::const_buffer> cb{boost::asio::const_buffer(s.data(), n)};
multi_buffer b;
while(n)
{
auto const amount = (std::min)(n, std::size_t(3)/*size*/);
b.commit(boost::asio::buffer_copy(b.prepare(amount), cb));
cb.consume(amount);
n -= amount;
}
BEAST_EXPECT(u.write(b.data()) == result[i]);
}
}
void
run() override
{
@@ -543,6 +575,17 @@ public:
testFourByteSequence();
testWithStreamBuffer();
testBranches();
AutodeskTests();
// 6.4.2
AutobahnTest(std::vector<std::vector<std::uint8_t>>{
{ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
{ 0x90 }, { 0x80, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 } },
{ true, false, false});
// 6.4.4
AutobahnTest(std::vector<std::vector<std::uint8_t>>{
{ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5, 0xF4 },
{ 0x90 } },
{ true, false });
}
};