Files
boost_beast/include/beast/websocket/detail/utf8_checker.hpp

322 lines
9.1 KiB
C++
Raw Normal View History

2017-07-20 08:01:46 -07:00
//
2017-02-06 20:07:03 -05:00
// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
2017-07-20 08:01:46 -07:00
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_HPP
#define BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_HPP
#include <beast/core/type_traits.hpp>
2017-07-20 08:01:46 -07:00
#include <boost/asio/buffer.hpp>
2016-10-28 19:43:30 -04:00
#include <boost/assert.hpp>
#include <algorithm>
2017-07-20 08:01:46 -07:00
#include <cstdint>
2016-10-28 19:43:30 -04:00
2017-07-20 08:01:46 -07:00
namespace beast {
namespace websocket {
namespace detail {
2016-10-28 19:43:30 -04:00
/* This is a modified work.
2017-07-20 08:01:46 -07:00
2016-10-28 19:43:30 -04:00
Original version and license:
https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
2017-07-20 08:01:46 -07:00
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
2016-10-28 19:43:30 -04:00
Additional changes:
Optimized for predominantly 7-bit content, 2016
https://github.com/uWebSockets/uWebSockets/blob/755bd362649c06abff102f18e273c5792c51c1a0/src/WebSocketProtocol.h#L198
Copyright (c) 2016 Alex Hultman and contributors
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgement in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
/** A UTF8 validator.
This validator can be used to check if a buffer containing UTF8 text is
valid. The write function may be called incrementally with segmented UTF8
sequences. The finish function determines if all processed text is valid.
2017-07-20 08:01:46 -07:00
*/
template<class = void>
class utf8_checker_t
{
2016-10-28 19:43:30 -04:00
std::size_t need_ = 0;
std::uint8_t* p_ = have_;
std::uint8_t have_[4];
2017-07-20 08:01:46 -07:00
public:
2016-10-28 19:43:30 -04:00
/** Prepare to process text as valid utf8
*/
2017-07-20 08:01:46 -07:00
void
reset();
2016-10-28 19:43:30 -04:00
/** Check that all processed text is valid utf8
*/
2017-07-20 08:01:46 -07:00
bool
2016-10-28 19:43:30 -04:00
finish();
/** Check if text is valid UTF8
2017-07-20 08:01:46 -07:00
2016-10-28 19:43:30 -04:00
@return `true` if the text is valid utf8 or false otherwise.
*/
2017-07-20 08:01:46 -07:00
bool
2016-10-28 19:43:30 -04:00
write(std::uint8_t const* in, std::size_t size);
/** Check if text is valid UTF8
2017-07-20 08:01:46 -07:00
2016-10-28 19:43:30 -04:00
@return `true` if the text is valid utf8 or false otherwise.
*/
template<class ConstBufferSequence>
2017-07-20 08:01:46 -07:00
bool
2016-10-28 19:43:30 -04:00
write(ConstBufferSequence const& bs);
2017-07-20 08:01:46 -07:00
};
template<class _>
void
utf8_checker_t<_>::
reset()
2017-07-20 08:01:46 -07:00
{
2016-10-28 19:43:30 -04:00
need_ = 0;
p_ = have_;
2017-07-20 08:01:46 -07:00
}
template<class _>
bool
utf8_checker_t<_>::
finish()
2017-07-20 08:01:46 -07:00
{
2016-10-28 19:43:30 -04:00
auto const success = need_ == 0;
reset();
return success;
2017-07-20 08:01:46 -07:00
}
template<class _>
2016-10-28 19:43:30 -04:00
template<class ConstBufferSequence>
2017-07-20 08:01:46 -07:00
bool
utf8_checker_t<_>::
write(ConstBufferSequence const& bs)
2017-07-20 08:01:46 -07:00
{
static_assert(is_const_buffer_sequence<ConstBufferSequence>::value,
2016-10-28 19:43:30 -04:00
"ConstBufferSequence requirements not met");
2017-07-20 08:01:46 -07:00
using boost::asio::buffer_cast;
using boost::asio::buffer_size;
2017-06-13 11:53:06 -07:00
for(boost::asio::const_buffer b : bs)
2016-10-28 19:43:30 -04:00
if(! write(buffer_cast<std::uint8_t const*>(b),
2017-07-20 08:01:46 -07:00
buffer_size(b)))
return false;
return true;
}
template<class _>
bool
utf8_checker_t<_>::
write(std::uint8_t const* in, std::size_t size)
2017-07-20 08:01:46 -07:00
{
2016-10-28 19:43:30 -04:00
auto const valid =
[](std::uint8_t const*& in)
{
if (in[0] < 128)
{
++in;
return true;
}
if ((in[0] & 0x60) == 0x40)
{
2016-11-14 17:21:42 -05:00
if ((in[1] & 0xc0) != 0x80)
return false;
2016-10-28 19:43:30 -04:00
in += 2;
return true;
}
if ((in[0] & 0xf0) == 0xe0)
{
if ((in[1] & 0xc0) != 0x80 ||
(in[2] & 0xc0) != 0x80 ||
(in[0] == 224 && in[1] < 160) ||
(in[0] == 237 && in[1] > 159))
return false;
in += 3;
return true;
}
if ((in[0] & 0xf8) == 0xf0)
{
if (in[0] > 244 ||
(in[1] & 0xc0) != 0x80 ||
(in[2] & 0xc0) != 0x80 ||
(in[3] & 0xc0) != 0x80 ||
(in[0] == 240 && in[1] < 144) ||
(in[0] == 244 && in[1] > 143))
return false;
in += 4;
return true;
}
return false;
};
auto const valid_have =
[&]()
{
if ((have_[0] & 0x60) == 0x40)
return have_[0] <= 223;
if ((have_[0] & 0xf0) == 0xe0)
{
if (p_ - have_ > 1 &&
((have_[1] & 0xc0) != 0x80 ||
(have_[0] == 224 && have_[1] < 160) ||
(have_[0] == 237 && have_[1] > 159)))
return false;
return true;
}
if ((have_[0] & 0xf8) == 0xf0)
{
auto const size = p_ - have_;
2016-11-14 17:21:42 -05:00
if (size > 2 && (have_[2] & 0xc0) != 0x80)
return false;
2016-10-28 19:43:30 -04:00
if (size > 1 &&
((have_[1] & 0xc0) != 0x80 ||
(have_[0] == 240 && have_[1] < 144) ||
(have_[0] == 244 && have_[1] > 143)))
return false;
}
return true;
};
auto const needed =
[](std::uint8_t const in)
{
if (in < 128)
return 1;
if (in < 194)
return 0;
if (in < 224)
return 2;
if (in < 240)
return 3;
if (in < 245)
return 4;
return 0;
};
auto const end = in + size;
if (need_ > 0)
{
auto n = (std::min)(size, need_);
2016-10-28 19:43:30 -04:00
size -= n;
need_ -= n;
while(n--)
*p_++ = *in++;
if(need_ > 0)
{
BOOST_ASSERT(in == end);
return valid_have();
}
std::uint8_t const* p = &have_[0];
if (! valid(p))
return false;
p_ = have_;
}
auto last = in + size - 7;
while(in < last)
{
#if BEAST_WEBSOCKET_NO_UNALIGNED_READ
auto constexpr align = sizeof(std::size_t) - 1;
auto constexpr mask = static_cast<
std::size_t>(0x8080808080808080 &
~std::size_t{0});
if(
((reinterpret_cast<
std::uintptr_t>(in) & align) == 0) &&
(*reinterpret_cast<
std::size_t const*>(in) & mask) == 0)
in += sizeof(std::size_t);
else if(! valid(in))
return false;
#else
auto constexpr mask = static_cast<
std::size_t>(0x8080808080808080 &
~std::size_t{0});
if(
(*reinterpret_cast<
std::size_t const*>(in) & mask) == 0)
in += sizeof(std::size_t);
else if(! valid(in))
return false;
#endif
}
last += 4;
while(in < last)
if(! valid(in))
return false;
for(;;)
{
auto n = end - in;
if(! n)
break;
auto const need = needed(*in);
if (need == 0)
return false;
if(need <= n)
{
if(! valid(in))
return false;
}
else
{
need_ = need - n;
while(n--)
*p_++ = *in++;
return valid_have();
}
}
return true;
2017-07-20 08:01:46 -07:00
}
using utf8_checker = utf8_checker_t<>;
template<class = void>
bool
check_utf8(char const* p, std::size_t n)
{
utf8_checker c;
2016-10-28 19:43:30 -04:00
if(! c.write(reinterpret_cast<const uint8_t*>(p), n))
2017-07-20 08:01:46 -07:00
return false;
return c.finish();
}
} // detail
} // websocket
} // beast
#endif