basic_parser optimizations:

fix #185, fix #489

* SSE4.2 is detected

* basic_parser uses SSE4.2 if available

* basic_parser tries to parse on the initial buffer and if
  it does not find the end of header it shifts to a new
  strategy of waiting for the end of header to defeat
  slow loris attacks. This coincidentally is also faster
  than the previous algorithm.
This commit is contained in:
Vinnie Falco
2017-06-30 08:46:22 -07:00
parent c94821384d
commit e608acb791
11 changed files with 1733 additions and 1375 deletions

View File

@@ -1,3 +1,11 @@
Version 73:
HTTP:
* basic_parser optimizations
--------------------------------------------------------------------------------
Version 72:
HTTP:
@@ -14,6 +22,7 @@ WebSocket:
* Add websocket-server-async example
--------------------------------------------------------------------------------
Version 71:

View File

@@ -116,7 +116,7 @@ endfunction()
if ("${VARIANT}" STREQUAL "coverage")
if (MSVC)
else()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -fprofile-arcs -ftest-coverage")
set (CMAKE_BUILD_TYPE RELWITHDEBINFO)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov")
endif()
@@ -125,7 +125,7 @@ elseif ("${VARIANT}" STREQUAL "ubasan")
if (MSVC)
else()
set (CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -DBEAST_NO_SLOW_TESTS=1 -funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/scripts/blacklist.supp")
"${CMAKE_CXX_FLAGS} -DBEAST_NO_SLOW_TESTS=1 -msse4.2 -funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/scripts/blacklist.supp")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined")
set (CMAKE_BUILD_TYPE RELWITHDEBINFO)
endif()

View File

@@ -55,7 +55,7 @@ if [ os.name ] = MACOSX
variant coverage :
release
:
<cxxflags>"-fprofile-arcs -ftest-coverage"
<cxxflags>"-msse4.2 -fprofile-arcs -ftest-coverage"
<linkflags>"-lgcov"
;
@@ -63,7 +63,7 @@ variant ubasan
:
release
:
<cxxflags>"-funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined -fsanitize-blacklist=scripts/blacklist.supp"
<cxxflags>"-msse4.2 -funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined -fsanitize-blacklist=scripts/blacklist.supp"
<linkflags>"-fsanitize=address,undefined"
;

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef BEAST_DETAIL_CPU_INFO_HPP
#define BEAST_DETAIL_CPU_INFO_HPP
#ifndef BEAST_NO_INTRINSICS
# if defined(_MSC_VER) || \
(defined(__i386__) && defined(__PIC__) && \
defined(__GNUC__) && ! defined(__clang__)) || \
defined(__i386__)
# define BEAST_NO_INTRINSICS 0
# else
# define BEAST_NO_INTRINSICS 1
# endif
#endif
#if ! BEAST_NO_INTRINSICS
#if defined(_MSC_VER)
#include <intrin.h> // __cpuid
#endif
namespace beast {
namespace detail {
/* Portions from Boost,
Copyright Andrey Semashev 2007 - 2015.
*/
template<class = void>
void
cpuid(
std::uint32_t id,
std::uint32_t& eax,
std::uint32_t& ebx,
std::uint32_t& ecx,
std::uint32_t& edx)
{
#if defined(_MSC_VER)
int regs[4];
__cpuid(regs, id);
eax = regs[0];
ebx = regs[1];
ecx = regs[2];
edx = regs[3];
#elif defined(__i386__) && defined(__PIC__) && \
defined(__GNUC__) && ! defined(__clang__)
// We have to backup ebx in 32 bit PIC code because it is reserved by the ABI
uint32_t ebx_backup;
__asm__ __volatile__
(
"movl %%ebx, %0\n\t"
"movl %1, %%ebx\n\t"
"cpuid\n\t"
"movl %%ebx, %1\n\t"
"movl %0, %%ebx\n\t"
: "=m" (ebx_backup), "+m"(ebx), "+a"(eax), "+c"(ecx), "+d"(edx)
);
#elif defined(__i386__)
__asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(id) : "ebx");
#else
# error Unknown compiler!
#endif
}
struct cpu_info
{
bool sse42 = false;
cpu_info();
};
inline
cpu_info::
cpu_info()
{
std::uint32_t eax, ebx, ecx, edx;
cpuid(0, eax, ebx, ecx, edx);
if(eax >= 1)
{
cpuid(1, eax, ebx, ecx, edx);
sse42 = (ecx & (1 << 20)) != 0;
}
}
template<class = void>
cpu_info const&
get_cpu_info()
{
static cpu_info const ci;
return ci;
}
} // detail
} // beast
#endif
#endif

View File

@@ -75,9 +75,6 @@ class basic_parser
template<bool OtherIsRequest, class OtherDerived>
friend class basic_parser;
// limit on the size of the obs-fold buffer
static std::size_t constexpr max_obs_fold = 4096;
// limit on the size of the stack flat buffer
static std::size_t constexpr max_stack_buffer = 8192;
@@ -129,6 +126,7 @@ class basic_parser
std::size_t skip_ = 0; // resume search here
std::uint32_t
header_limit_ = 8192; // max header size
unsigned short status_; // response status
state state_ = // initial state
state::nothing_yet;
unsigned f_ = 0; // flags
@@ -212,7 +210,7 @@ public:
bool
is_header_done() const
{
return state_ > state::header;
return state_ > state::fields;
}
/** Returns `true` if the message is an upgrade message.
@@ -312,6 +310,9 @@ public:
input. If the end of the header is not found within the
limit of the header size, the error @ref error::header_limit
is returned by @ref put.
Setting the limit after any header octets have been parsed
results in undefined behavior.
*/
void
header_limit(std::uint32_t v)
@@ -452,15 +453,31 @@ private:
error_code& ec);
void
parse_header(char const*& p,
std::size_t n, error_code& ec);
maybe_need_more(
char const* p, std::size_t n,
error_code& ec);
void
parse_header(char const*& p, char const* term,
parse_start_line(
char const*& p, char const* last,
error_code& ec, std::true_type);
void
parse_header(char const*& p, char const* term,
parse_start_line(
char const*& p, char const* last,
error_code& ec, std::false_type);
void
parse_fields(
char const*& p, char const* last,
error_code& ec);
void
finish_header(
error_code& ec, std::true_type);
void
finish_header(
error_code& ec, std::false_type);
void
@@ -479,10 +496,6 @@ private:
parse_chunk_body(char const*& p,
std::size_t n, error_code& ec);
void
parse_fields(char const*& p,
char const* last, error_code& ec);
void
do_field(field f,
string_view value, error_code& ec);

View File

@@ -8,9 +8,12 @@
#ifndef BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
#define BEAST_HTTP_DETAIL_BASIC_PARSER_HPP
#include <beast/core/static_string.hpp>
#include <beast/core/string.hpp>
#include <beast/core/detail/cpu_info.hpp>
#include <beast/http/error.hpp>
#include <beast/http/detail/rfc7230.hpp>
#include <boost/config.hpp>
#include <boost/version.hpp>
#include <algorithm>
#include <cstddef>
@@ -47,25 +50,41 @@
* IN THE SOFTWARE.
*/
#if ! BEAST_NO_INTRINSICS
# ifdef BOOST_MSVC
# include <nmmintrin.h>
# else
# include <x86intrin.h>
# endif
#endif
namespace beast {
namespace http {
namespace detail {
#if __GNUC__ >= 3
# define BEAST_LIKELY(x) __builtin_expect(!!(x), 1)
# define BEAST_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define BEAST_LIKELY(x) (x)
#define BEAST_UNLIKELY(x) (x)
#endif
class basic_parser_base
{
protected:
#if ! BEAST_NO_INTRINSICS
bool sse42_;
basic_parser_base()
: sse42_(beast::detail::get_cpu_info().sse42)
{
}
#endif
// limit on the size of the obs-fold buffer
//
// https://stackoverflow.com/questions/686217/maximum-on-http-header-values
//
static std::size_t constexpr max_obs_fold = 4096;
enum class state
{
nothing_yet = 0,
header,
start_line,
fields,
body0,
body,
body_to_eof0,
@@ -104,59 +123,6 @@ protected:
return tab[static_cast<unsigned char>(c)];
}
static
bool
is_value_char(char c)
{
// any OCTET except CTLs and LWS
static bool constexpr tab[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
};
return tab[static_cast<unsigned char>(c)];
}
static
inline
bool
is_text(char c)
{
// VCHAR / SP / HT / obs-text
static bool constexpr tab[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, // 0
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 144
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 160
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 176
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 192
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 208
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 224
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 240
};
return tab[static_cast<unsigned char>(c)];
}
static
inline
bool
@@ -197,7 +163,37 @@ protected:
bool
is_print(char c)
{
return static_cast<unsigned char>(c-33) < 94;
return static_cast<unsigned char>(c-32) < 95;
}
template<class FwdIt>
static
FwdIt
trim_front(FwdIt it, FwdIt const& end)
{
while(it != end)
{
if(*it != ' ' && *it != '\t')
break;
++it;
}
return it;
}
template<class RanIt>
static
RanIt
trim_back(
RanIt it, RanIt const& first)
{
while(it != first)
{
auto const c = it[-1];
if(c != ' ' && c != '\t')
break;
--it;
}
return it;
}
static
@@ -208,28 +204,214 @@ protected:
std::size_t>(last - first)};
}
template<class = void>
static
bool
strieq(string_view s1,
string_view s2)
//--------------------------------------------------------------------------
std::pair<char const*, bool>
find_fast(
char const* buf,
char const* buf_end,
char const* ranges,
size_t ranges_size)
{
if(s1.size() != s2.size())
return false;
auto p1 = s1.data();
auto p2 = s2.data();
for(auto n = s1.size(); n--; ++p1, ++p2)
if(*p1 != tolower(*p2))
return false;
return true;
bool found = false;
#if ! BEAST_NO_INTRINSICS
if(BOOST_LIKELY(sse42_))
{
if(BOOST_LIKELY(buf_end - buf >= 16))
{
__m128i ranges16 = _mm_loadu_si128((__m128i const*)ranges);
std::size_t left = (buf_end - buf) & ~15;
do
{
__m128i b16 = _mm_loadu_si128((__m128i const*)buf);
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16,
_SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
if(BOOST_UNLIKELY(r != 16))
{
buf += r;
found = true;
break;
}
buf += 16;
left -= 16;
}
while(BOOST_LIKELY(left != 0));
}
}
template<std::size_t N>
bool
strieq(const char (&s1)[N],
string_view s2)
#else
boost::ignore_unused(buf_end, ranges, ranges_size);
#endif
return {buf, found};
}
// VFALCO Can SIMD help this?
static
char const*
find_eol(
char const* it, char const* last,
error_code& ec)
{
return strieq({s1, N-1}, s2);
for(;;)
{
if(it == last)
{
ec.assign(0, ec.category());
return nullptr;
}
if(*it == '\r')
{
if(++it == last)
{
ec.assign(0, ec.category());
return nullptr;
}
if(*it != '\n')
{
ec = error::bad_line_ending;
return nullptr;
}
ec.assign(0, ec.category());
return ++it;
}
// VFALCO Should we handle the legacy case
// for lines terminated with a single '\n'?
++it;
}
}
// VFALCO Can SIMD help this?
static
char const*
find_eom(char const* p, char const* last)
{
for(;;)
{
if(p + 4 > last)
return nullptr;
if(p[3] != '\n')
{
if(p[3] == '\r')
++p;
else
p += 4;
}
else if(p[2] != '\r')
{
p += 4;
}
else if(p[1] != '\n')
{
p += 2;
}
else if(p[0] != '\r')
{
p += 2;
}
else
{
return p + 4;
}
}
}
//--------------------------------------------------------------------------
char const*
parse_token_to_eol(
char const* p,
char const* last,
char const*& token_last,
error_code& ec)
{
#if ! BEAST_NO_INTRINSICS
static char const ranges1[] =
"\0\010" // allow HT */
"\012\037" // allow SP and up to but not including DEL
"\177\177" // allow chars w. MSB set
;
bool found;
std::tie(p, found) = find_fast(
p, last, ranges1, sizeof(ranges1) - 1);
if(found)
goto found_control;
#else
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
while(BOOST_LIKELY(last - p >= 8))
{
#define BEAST_PARSE_TOKEN_TO_EOL_REPEAT() \
do \
{ \
if(BOOST_UNLIKELY( \
! is_print(*p))) \
goto non_printable; \
++p; \
} \
while(0);
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
BEAST_PARSE_TOKEN_TO_EOL_REPEAT();
#undef BEAST_PARSE_TOKEN_TO_EOL_REPEAT
continue;
non_printable:
if((BOOST_LIKELY((unsigned char)*p < '\040') &&
BOOST_LIKELY(*p != '\011')) ||
BOOST_UNLIKELY(*p == '\177'))
goto found_control;
++p;
}
#endif
for(;; ++p)
{
if(p >= last)
{
ec = error::need_more;
return p;
}
if(BOOST_UNLIKELY(! is_print(*p)))
if((BOOST_LIKELY(static_cast<
unsigned char>(*p) < '\040') &&
BOOST_LIKELY(*p != '\011')) ||
BOOST_UNLIKELY(*p == '\177'))
goto found_control;
}
found_control:
if(BOOST_LIKELY(*p == '\r'))
{
if(++p >= last)
{
ec = error::need_more;
return last;
}
if(*p++ != '\n')
{
ec = error::bad_line_ending;
return last;
}
token_last = p - 2;
}
#if 0
// VFALCO This allows `\n` by itself
// to terminate a line
else if(*p == '\n')
{
token_last = p;
++p;
}
#endif
else
{
// invalid character
return nullptr;
}
return p;
}
template<class Iter, class Unsigned>
@@ -284,190 +466,361 @@ protected:
}
static
string_view
parse_method(char const*& it)
void
parse_method(
char const*& it, char const* last,
string_view& result, error_code& ec)
{
// parse token SP
auto const first = it;
while(detail::is_tchar(*it))
++it;
return {first, static_cast<
string_view::size_type>(
it - first)};
for(;; ++it)
{
if(it + 1 > last)
{
ec = error::need_more;
return;
}
static
string_view
parse_target(char const*& it)
if(! detail::is_tchar(*it))
break;
}
if(it + 1 > last)
{
auto const first = it;
while(is_pathchar(*it))
++it;
ec = error::need_more;
return;
}
if(*it != ' ')
return {};
return {first, static_cast<
string_view::size_type>(
it - first)};
{
ec = error::bad_method;
return;
}
if(it == first)
{
// cannot be empty
ec = error::bad_method;
return;
}
result = make_string(first, it++);
}
static
string_view
parse_name(char const*& it)
void
parse_target(
char const*& it, char const* last,
string_view& result, error_code& ec)
{
// parse target SP
auto const first = it;
while(to_field_char(*it))
++it;
return {first, static_cast<
string_view::size_type>(
it - first)};
for(;; ++it)
{
if(it + 1 > last)
{
ec = error::need_more;
return;
}
if(! is_pathchar(*it))
break;
}
if(it + 1 > last)
{
ec = error::need_more;
return;
}
if(*it != ' ')
{
ec = error::bad_target;
return;
}
if(it == first)
{
// cannot be empty
ec = error::bad_target;
return;
}
result = make_string(first, it++);
}
static
int
parse_version(char const*& it)
void
parse_version(
char const*& it, char const* last,
int& result, error_code& ec)
{
if(*it != 'H')
return -1;
if(*++it != 'T')
return -1;
if(*++it != 'T')
return -1;
if(*++it != 'P')
return -1;
if(*++it != '/')
return -1;
if(! is_digit(*++it))
return -1;
int v = 10 * (*it - '0');
if(*++it != '.')
return -1;
if(! is_digit(*++it))
return -1;
v += *it++ - '0';
return v;
if(it + 8 > last)
{
ec = error::need_more;
return;
}
static
int
parse_status(char const*& it)
if(*it++ != 'H')
{
int v;
ec = error::bad_version;
return;
}
if(*it++ != 'T')
{
ec = error::bad_version;
return;
}
if(*it++ != 'T')
{
ec = error::bad_version;
return;
}
if(*it++ != 'P')
{
ec = error::bad_version;
return;
}
if(*it++ != '/')
{
ec = error::bad_version;
return;
}
if(! is_digit(*it))
return -1;
v = 100 * (*it - '0');
if(! is_digit(*++it))
return -1;
v += 10 * (*it - '0');
if(! is_digit(*++it))
return -1;
v += (*it++ - '0');
return v;
{
ec = error::bad_version;
return;
}
result = 10 * (*it++ - '0');
if(*it++ != '.')
{
ec = error::bad_version;
return;
}
if(! is_digit(*it))
{
ec = error::bad_version;
return;
}
result += *it++ - '0';
}
static
string_view
parse_reason(char const*& it)
void
parse_status(
char const*& it, char const* last,
unsigned short& result, error_code& ec)
{
// parse 3(digit) SP
if(it + 4 > last)
{
ec = error::need_more;
return;
}
if(! is_digit(*it))
{
ec = error::bad_status;
return;
}
result = 100 * (*it++ - '0');
if(! is_digit(*it))
{
ec = error::bad_status;
return;
}
result += 10 * (*it++ - '0');
if(! is_digit(*it))
{
ec = error::bad_status;
return;
}
result += *it++ - '0';
if(*it++ != ' ')
{
ec = error::bad_status;
return;
}
}
void
parse_reason(
char const*& it, char const* last,
string_view& result, error_code& ec)
{
auto const first = it;
while(*it != '\r')
char const* token_last;
auto p = parse_token_to_eol(
it, last, token_last, ec);
if(ec)
return;
if(! p)
{
if(! is_text(*it))
return {};
++it;
ec = error::bad_reason;
return;
}
return {first, static_cast<
std::size_t>(it - first)};
result = make_string(first, token_last);
it = p;
}
// VFALCO Can SIMD help this?
static
char const*
find_eol(
char const* it, char const* last,
template<std::size_t N>
void
parse_field(
char const*& p,
char const* last,
string_view& name,
string_view& value,
static_string<N>& buf,
error_code& ec)
{
#if 0
// SLOWER
it = reinterpret_cast<char const*>(
std::memchr(it, '\r', last - it));
if(! it)
{
ec.assign(0, ec.category());
return nullptr;
}
if(it + 2 > last)
{
ec.assign(0, ec.category());
return nullptr;
}
if(it[1] != '\n')
{
ec = error::bad_line_ending;
return nullptr;
}
ec.assign(0, ec.category());
return it + 2;
#else
for(;;)
{
if(it == last)
{
ec.assign(0, ec.category());
return nullptr;
}
if(*it == '\r')
{
if(++it == last)
{
ec.assign(0, ec.category());
return nullptr;
}
if(*it != '\n')
{
ec = error::bad_line_ending;
return nullptr;
}
ec.assign(0, ec.category());
return ++it;
}
// VFALCO Should we handle the legacy case
// for lines terminated with a single '\n'?
++it;
}
#endif
}
/* header-field = field-name ":" OWS field-value OWS
// VFALCO Can SIMD help this?
static
char const*
find_eom(char const* p, char const* last)
field-name = token
field-value = *( field-content / obs-fold )
field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
field-vchar = VCHAR / obs-text
obs-fold = CRLF 1*( SP / HTAB )
; obsolete line folding
; see Section 3.2.4
token = 1*<any CHAR except CTLs or separators>
CHAR = <any US-ASCII character (octets 0 - 127)>
sep = "(" | ")" | "<" | ">" | "@"
| "," | ";" | ":" | "\" | <">
| "/" | "[" | "]" | "?" | "="
| "{" | "}" | SP | HT
*/
static char const* is_token =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
// name
BOOST_ALIGNMENT(16) static const char ranges1[] =
"\x00 " /* control chars and up to SP */
"\"\"" /* 0x22 */
"()" /* 0x28,0x29 */
",," /* 0x2c */
"//" /* 0x2f */
":@" /* 0x3a-0x40 */
"[]" /* 0x5b-0x5d */
"{\377"; /* 0x7b-0xff */
auto first = p;
bool found;
std::tie(p, found) = find_fast(
p, last, ranges1, sizeof(ranges1)-1);
if(! found && p >= last)
{
ec = error::need_more;
return;
}
for(;;)
{
if(*p == ':')
break;
if(! is_token[static_cast<
unsigned char>(*p)])
{
ec = error::bad_field;
return;
}
++p;
if(p >= last)
{
ec = error::need_more;
return;
}
}
if(p == first)
{
// empty name
ec = error::bad_field;
return;
}
name = make_string(first, p);
++p; // eat ':'
char const* token_last;
for(;;)
{
// eat leading ' ' and '\t'
for(;;++p)
{
if(p + 1 > last)
{
ec = error::need_more;
return;
}
if(! (*p == ' ' || *p == '\t'))
break;
}
// parse to CRLF
first = p;
p = parse_token_to_eol(p, last, token_last, ec);
if(ec)
return;
if(! p)
{
ec = error::bad_value;
return;
}
// Look 1 char past the CRLF to handle obs-fold.
if(p + 1 > last)
{
ec = error::need_more;
return;
}
token_last =
trim_back(token_last, first);
if(*p != ' ' && *p != '\t')
{
value = make_string(first, token_last);
return;
}
++p;
if(token_last != first)
break;
}
buf.resize(0);
buf.append(first, token_last);
BOOST_ASSERT(! buf.empty());
try
{
for(;;)
{
if(p + 4 > last)
return nullptr;
if(p[3] != '\n')
// eat leading ' ' and '\t'
for(;;++p)
{
if(p[3] == '\r')
if(p + 1 > last)
{
ec = error::need_more;
return;
}
if(! (*p == ' ' || *p == '\t'))
break;
}
// parse to CRLF
first = p;
p = parse_token_to_eol(p, last, token_last, ec);
if(ec)
return;
// Look 1 char past the CRLF to handle obs-fold.
if(p + 1 > last)
{
ec = error::need_more;
return;
}
token_last = trim_back(token_last, first);
if(first != token_last)
{
buf.push_back(' ');
buf.append(first, token_last);
}
if(*p != ' ' && *p != '\t')
{
value = {buf.data(), buf.size()};
return;
}
++p;
else
p += 4;
}
else if(p[2] != '\r')
}
catch(std::length_error const&)
{
p += 4;
}
else if(p[1] != '\n')
{
p += 2;
}
else if(p[0] != '\r')
{
p += 2;
}
else
{
return p + 4;
}
ec = error::header_limit;
return;
}
}
};

View File

@@ -167,42 +167,6 @@ is_qpchar(char c)
return tab[static_cast<unsigned char>(c)];
}
// converts to lower case,
// returns 0 if not a valid token char
//
inline
char
to_field_char(char c)
{
/* token = 1*<any CHAR except CTLs or separators>
CHAR = <any US-ASCII character (octets 0 - 127)>
sep = "(" | ")" | "<" | ">" | "@"
| "," | ";" | ":" | "\" | <">
| "/" | "[" | "]" | "?" | "="
| "{" | "}" | SP | HT
*/
static char constexpr tab[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, '!', 0, '#', '$', '%', '&', '\'', 0, 0, '*', '+', 0, '-', '.', 0,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, '|', 0, '~', 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
BOOST_STATIC_ASSERT(sizeof(tab) == 256);
return tab[static_cast<unsigned char>(c)];
}
// converts to lower case,
// returns 0 if not a valid text char
//

View File

@@ -21,40 +21,6 @@
namespace beast {
namespace http {
namespace detail {
template<class FwdIt>
inline
FwdIt
skip_ows2(FwdIt it, FwdIt const& end)
{
while(it != end)
{
if(*it != ' ' && *it != '\t')
break;
++it;
}
return it;
}
template<class RanIt>
inline
RanIt
skip_ows_rev2(
RanIt it, RanIt const& first)
{
while(it != first)
{
auto const c = it[-1];
if(c != ' ' && c != '\t')
break;
--it;
}
return it;
}
} // detail
template<bool isRequest, class Derived>
basic_parser<isRequest, Derived>::
basic_parser()
@@ -177,6 +143,7 @@ put(boost::asio::const_buffers_1 const& buffer,
auto n = buffer_size(*buffer.begin());
auto const p0 = p;
auto const p1 = p0 + n;
ec.assign(0, ec.category());
loop:
switch(state_)
{
@@ -186,16 +153,67 @@ loop:
ec = error::need_more;
return 0;
}
state_ = state::header;
state_ = state::start_line;
BEAST_FALLTHROUGH;
case state::header:
parse_header(p, n, ec);
case state::start_line:
{
maybe_need_more(p, n, ec);
if(ec)
goto done;
parse_start_line(p, p + std::min<std::size_t>(
header_limit_, n), ec, is_request{});
if(ec)
{
if(ec == error::need_more)
{
if(n >= header_limit_)
{
ec = error::header_limit;
goto done;
}
if(p + 3 <= p1)
skip_ = static_cast<
std::size_t>(p1 - p - 3);
}
goto done;
}
BOOST_ASSERT(! is_done());
n = static_cast<std::size_t>(p1 - p);
if(p >= p1)
{
ec = error::need_more;
goto done;
}
BEAST_FALLTHROUGH;
}
case state::fields:
maybe_need_more(p, n, ec);
if(ec)
goto done;
parse_fields(p, p + std::min<std::size_t>(
header_limit_, n), ec);
if(ec)
{
if(ec == error::need_more)
{
if(n >= header_limit_)
{
ec = error::header_limit;
goto done;
}
if(p + 3 <= p1)
skip_ = static_cast<
std::size_t>(p1 - p - 3);
}
goto done;
}
finish_header(ec, is_request{});
break;
case state::body0:
BOOST_ASSERT(! skip_);
impl().on_body(content_length(), ec);
if(ec)
goto done;
@@ -203,12 +221,14 @@ loop:
BEAST_FALLTHROUGH;
case state::body:
BOOST_ASSERT(! skip_);
parse_body(p, n, ec);
if(ec)
goto done;
break;
case state::body_to_eof0:
BOOST_ASSERT(! skip_);
impl().on_body(content_length(), ec);
if(ec)
goto done;
@@ -216,6 +236,7 @@ loop:
BEAST_FALLTHROUGH;
case state::body_to_eof:
BOOST_ASSERT(! skip_);
parse_body_to_eof(p, n, ec);
if(ec)
goto done;
@@ -259,7 +280,8 @@ basic_parser<isRequest, Derived>::
put_eof(error_code& ec)
{
BOOST_ASSERT(got_some());
if(state_ == state::header)
if( state_ == state::start_line ||
state_ == state::fields)
{
ec = error::partial_message;
return;
@@ -300,9 +322,12 @@ template<bool isRequest, class Derived>
inline
void
basic_parser<isRequest, Derived>::
parse_header(char const*& p,
std::size_t n, error_code& ec)
maybe_need_more(
char const* p, std::size_t n,
error_code& ec)
{
if(skip_ == 0)
return;
if( n > header_limit_)
n = header_limit_;
if(n < skip_ + 4)
@@ -320,73 +345,57 @@ parse_header(char const*& p,
ec = error::header_limit;
return;
}
ec = http::error::need_more;
ec = error::need_more;
return;
}
skip_ = 0;
parse_header(p, term, ec,
std::integral_constant<bool, isRequest>{});
if(ec)
return;
impl().on_header(ec);
if(ec)
return;
if(state_ == state::complete)
{
impl().on_complete(ec);
if(ec)
return;
}
}
template<bool isRequest, class Derived>
inline
void
basic_parser<isRequest, Derived>::
parse_header(char const*& p, char const* term,
parse_start_line(
char const*& in, char const* last,
error_code& ec, std::true_type)
{
/*
request-line = method SP request-target SP HTTP-version CRLF
method = token
*/
auto const method = parse_method(p);
if(method.empty())
{
ec = error::bad_method;
return;
}
if(*p++ != ' ')
{
ec = error::bad_method;
return;
}
auto p = in;
auto const target = parse_target(p);
if(target.empty())
{
ec = error::bad_target;
string_view method;
parse_method(p, last, method, ec);
if(ec)
return;
}
if(*p++ != ' ')
{
ec = error::bad_target;
return;
}
auto const version = parse_version(p);
if(version < 0)
string_view target;
parse_target(p, last, target, ec);
if(ec)
return;
int version;
parse_version(p, last, version, ec);
if(ec)
return;
if(version < 10 || version > 11)
{
ec = error::bad_version;
return;
}
if(! parse_crlf(p))
if(p + 2 > last)
{
ec = error::need_more;
return;
}
if(p[0] != '\r' || p[1] != '\n')
{
ec = error::bad_version;
return;
}
p += 2;
if(version >= 11)
f_ |= flagHTTP11;
@@ -396,11 +405,114 @@ parse_header(char const*& p, char const* term,
if(ec)
return;
parse_fields(p, term, ec);
in = p;
state_ = state::fields;
}
template<bool isRequest, class Derived>
inline
void
basic_parser<isRequest, Derived>::
parse_start_line(
char const*& in, char const* last,
error_code& ec, std::false_type)
{
/*
status-line = HTTP-version SP status-code SP reason-phrase CRLF
status-code = 3*DIGIT
reason-phrase = *( HTAB / SP / VCHAR / obs-text )
*/
auto p = in;
int version;
parse_version(p, last, version, ec);
if(ec)
return;
BOOST_ASSERT(p == term);
if(version < 10 || version > 11)
{
ec = error::bad_version;
return;
}
// SP
if(p + 1 > last)
{
ec = error::need_more;
return;
}
if(*p++ != ' ')
{
ec = error::bad_version;
return;
}
parse_status(p, last, status_, ec);
if(ec)
return;
// parse reason CRLF
string_view reason;
parse_reason(p, last, reason, ec);
if(ec)
return;
if(version >= 11)
f_ |= flagHTTP11;
impl().on_response(
status_, reason, version, ec);
if(ec)
return;
in = p;
state_ = state::fields;
}
template<bool isRequest, class Derived>
void
basic_parser<isRequest, Derived>::
parse_fields(char const*& in,
char const* last, error_code& ec)
{
string_view name;
string_view value;
// https://stackoverflow.com/questions/686217/maximum-on-http-header-values
static_string<max_obs_fold> buf;
auto p = in;
for(;;)
{
if(p + 2 > last)
{
ec = error::need_more;
return;
}
if(p[0] == '\r')
{
if(p[1] != '\n')
ec = error::bad_line_ending;
in = p + 2;
return;
}
parse_field(p, last, name, value, buf, ec);
if(ec)
return;
auto const f = string_to_field(name);
do_field(f, value, ec);
if(ec)
return;
impl().on_field(f, name, value, ec);
if(ec)
return;
in = p;
}
}
template<bool isRequest, class Derived>
inline
void
basic_parser<isRequest, Derived>::
finish_header(error_code& ec, std::true_type)
{
// RFC 7230 section 3.3
// https://tools.ietf.org/html/rfc7230#section-3.3
@@ -430,62 +542,31 @@ parse_header(char const*& p, char const* term,
len_ = 0;
state_ = state::complete;
}
impl().on_header(ec);
if(ec)
return;
if(state_ == state::complete)
{
impl().on_complete(ec);
if(ec)
return;
}
}
template<bool isRequest, class Derived>
inline
void
basic_parser<isRequest, Derived>::
parse_header(char const*& p, char const* term,
error_code& ec, std::false_type)
finish_header(error_code& ec, std::false_type)
{
/*
status-line = HTTP-version SP status-code SP reason-phrase CRLF
status-code = 3*DIGIT
reason-phrase = *( HTAB / SP / VCHAR / obs-text )
*/
auto const version = parse_version(p);
if(version < 0 || *p != ' ')
{
ec = error::bad_version;
return;
}
++p;
auto const status = parse_status(p);
if(status < 0 || *p != ' ')
{
ec = error::bad_status;
return;
}
++p;
auto const reason = parse_reason(p);
if(! parse_crlf(p))
{
ec = error::bad_reason;
return;
}
if(version >= 11)
f_ |= flagHTTP11;
impl().on_response(
status, reason, version, ec);
if(ec)
return;
parse_fields(p, term, ec);
if(ec)
return;
BOOST_ASSERT(p == term);
// RFC 7230 section 3.3
// https://tools.ietf.org/html/rfc7230#section-3.3
if( (f_ & flagSkipBody) || // e.g. response to a HEAD request
status / 100 == 1 || // 1xx e.g. Continue
status == 204 || // No Content
status == 304) // Not Modified
status_ / 100 == 1 || // 1xx e.g. Continue
status_ == 204 || // No Content
status_ == 304) // Not Modified
{
state_ = state::complete;
return;
@@ -514,6 +595,16 @@ parse_header(char const*& p, char const* term,
f_ |= flagNeedEOF;
state_ = state::body_to_eof0;
}
impl().on_header(ec);
if(ec)
return;
if(state_ == state::complete)
{
impl().on_complete(ec);
if(ec)
return;
}
}
template<bool isRequest, class Derived>
@@ -712,117 +803,6 @@ parse_chunk_body(char const*& p,
state_ = state::chunk_header;
}
template<bool isRequest, class Derived>
void
basic_parser<isRequest, Derived>::
parse_fields(char const*& p,
char const* last, error_code& ec)
{
/* header-field = field-name ":" OWS field-value OWS
field-name = token
field-value = *( field-content / obs-fold )
field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
field-vchar = VCHAR / obs-text
obs-fold = CRLF 1*( SP / HTAB )
; obsolete line folding
; see Section 3.2.4
*/
for(;;)
{
auto term = find_eol(p, last, ec);
if(ec)
return;
BOOST_ASSERT(term);
if(p == term - 2)
{
p = term;
break;
}
auto const name = parse_name(p);
if(name.empty())
{
ec = error::bad_field;
return;
}
if(*p++ != ':')
{
ec = error::bad_field;
return;
}
if(*term != ' ' &&
*term != '\t')
{
auto it2 = term - 2;
p = detail::skip_ows2(p, it2);
it2 = detail::skip_ows_rev2(it2, p);
auto const f = string_to_field(name);
auto const value = make_string(p, it2);
do_field(f, value, ec);
if(ec)
return;
impl().on_field(f, name, value, ec);
if(ec)
return;
p = term;
}
else
{
// obs-fold
for(;;)
{
auto const it2 = term - 2;
p = detail::skip_ows2(p, it2);
if(p != it2)
break;
p = term;
if(*p != ' ' && *p != '\t')
break;
term = find_eol(p, last, ec);
if(ec)
return;
}
// https://stackoverflow.com/questions/686217/maximum-on-http-header-values
static_string<max_obs_fold> s;
try
{
if(p != term)
{
s.append(p, term - 2);
p = term;
for(;;)
{
if(*p != ' ' && *p != '\t')
break;
s.push_back(' ');
p = detail::skip_ows2(p, term - 2);
term = find_eol(p, last, ec);
if(ec)
return;
if(p != term - 2)
s.append(p, term - 2);
p = term;
}
}
}
catch(std::length_error const&)
{
ec = error::bad_obs_fold;
return;
}
auto const f = string_to_field(name);
string_view const value{s.data(), s.size()};
do_field(f, value, ec);
if(ec)
return;
impl().on_field(f, name, value, ec);
if(ec)
return;
}
}
}
template<bool isRequest, class Derived>
void
basic_parser<isRequest, Derived>::
@@ -842,19 +822,19 @@ do_field(field f,
}
for(auto const& s : list)
{
if(strieq("close", s))
if(iequals({"close", 5}, s))
{
f_ |= flagConnectionClose;
continue;
}
if(strieq("keep-alive", s))
if(iequals({"keep-alive", 10}, s))
{
f_ |= flagConnectionKeepAlive;
continue;
}
if(strieq("upgrade", s))
if(iequals({"upgrade", 7}, s))
{
f_ |= flagConnectionUpgrade;
continue;
@@ -864,16 +844,6 @@ do_field(field f,
return;
}
for(auto p = value.begin();
p != value.end(); ++p)
{
if(! is_text(*p))
{
ec = error::bad_value;
return;
}
}
// Content-Length
if(f == field::content_length)
{
@@ -933,7 +903,7 @@ do_field(field f,
auto const p = std::find_if(v.begin(), v.end(),
[&](typename token_list::value_type const& s)
{
return strieq("chunked", s);
return iequals({"chunked", 7}, s);
});
if(p == v.end())
return;

View File

@@ -259,7 +259,7 @@ public:
false, dynamic_body, fields>>(
Repeat, cres_);
});
#if 1
#if 0
timedTest(Trials, "nodejs_parser",
[&]
{

File diff suppressed because it is too large Load Diff

View File

@@ -32,13 +32,13 @@ public:
std::string path;
std::string reason;
std::string body;
bool got_on_begin = false;
bool got_on_field = false;
bool got_on_header = false;
bool got_on_body = false;
bool got_content_length = false;
bool got_on_chunk = false;
bool got_on_complete = false;
int got_on_begin = 0;
int got_on_field = 0;
int got_on_header = 0;
int got_on_body = 0;
int got_content_length = 0;
int got_on_chunk = 0;
int got_on_complete = 0;
std::unordered_map<
std::string, std::string> fields;
@@ -59,7 +59,7 @@ public:
path = std::string(
path_.data(), path_.size());
version = version_;
got_on_begin = true;
++got_on_begin;
if(fc_)
fc_->fail(ec);
else
@@ -75,7 +75,7 @@ public:
reason = std::string(
reason_.data(), reason_.size());
version = version_;
got_on_begin = true;
++got_on_begin;
if(fc_)
fc_->fail(ec);
else
@@ -86,7 +86,7 @@ public:
on_field(field, string_view name,
string_view value, error_code& ec)
{
got_on_field = true;
++got_on_field;
if(fc_)
fc_->fail(ec);
else
@@ -97,7 +97,7 @@ public:
void
on_header(error_code& ec)
{
got_on_header = true;
++got_on_header;
if(fc_)
fc_->fail(ec);
else
@@ -109,7 +109,7 @@ public:
std::uint64_t> const& content_length_,
error_code& ec)
{
got_on_body = true;
++got_on_body;
got_content_length =
static_cast<bool>(content_length_);
if(fc_)
@@ -134,7 +134,7 @@ public:
on_chunk(std::uint64_t,
string_view, error_code& ec)
{
got_on_chunk = true;
++got_on_chunk;
if(fc_)
fc_->fail(ec);
else
@@ -144,7 +144,7 @@ public:
void
on_complete(error_code& ec)
{
got_on_complete = true;
++got_on_complete;
if(fc_)
fc_->fail(ec);
else