Optimize field lookups

This commit is contained in:
Vinnie Falco
2017-06-26 10:46:17 -07:00
parent 64ff766b23
commit 2d6859831c
5 changed files with 205 additions and 295 deletions

View File

@ -4,6 +4,7 @@ Version 68:
* Small speed up in fields comparisons
* Adjust buffer size in fast server
* Use string_ref in older Boost versions
* Optimize field lookups
API Changes:

View File

@ -67,9 +67,24 @@ iequals(
return false;
auto p1 = lhs.data();
auto p2 = rhs.data();
char a, b;
while(n--)
if(ascii_tolower(*p1) != ascii_tolower(*p2))
{
a = *p1++;
b = *p2++;
if(a != b)
goto slow;
}
return true;
while(n--)
{
slow:
if(ascii_tolower(a) != ascii_tolower(b))
return false;
a = *p1++;
b = *p2++;
}
return true;
}
@ -92,7 +107,7 @@ iequals(
return detail::iequals(lhs, rhs);
}
/** A strictly less predicate for strings, using a case-insensitive comparison.
/** A case-insensitive less predicate for strings.
The case-comparison operation is defined only for low-ASCII characters.
*/
@ -100,8 +115,8 @@ struct iless
{
bool
operator()(
beast::string_view const& lhs,
beast::string_view const& rhs) const
string_view const& lhs,
string_view const& rhs) const
{
using std::begin;
using std::end;
@ -115,7 +130,7 @@ struct iless
}
};
/** A predicate for string equality, using a case-insensitive comparison.
/** A case-insensitive equality predicate for strings.
The case-comparison operation is defined only for low-ASCII characters.
*/
@ -123,8 +138,8 @@ struct iequal
{
bool
operator()(
beast::string_view const& lhs,
beast::string_view const& rhs) const
string_view const& lhs,
string_view const& rhs) const
{
return iequals(lhs, rhs);
}

View File

@ -8,8 +8,11 @@
#ifndef BEAST_HTTP_IMPL_FIELD_IPP
#define BEAST_HTTP_IMPL_FIELD_IPP
#include <beast/core/string.hpp>
#include <algorithm>
#include <array>
#include <unordered_map>
#include <vector>
#include <boost/assert.hpp>
namespace beast {
@ -17,24 +20,70 @@ namespace http {
namespace detail {
class field_strings
struct field_table
{
using array_type =
std::array<string_view, 352>;
array_type v_;
struct hash
{
std::size_t
operator()(string_view const& s) const
{
auto const n = s.size();
return
beast::detail::ascii_tolower(s[0]) *
beast::detail::ascii_tolower(s[n/2]) ^
beast::detail::ascii_tolower(s[n-1]); // hist[] = 331, 10, max_load_factor = 0.15f
}
};
public:
using const_iterator =
array_type::const_iterator;
struct iequal
{
// assumes inputs have equal length
bool
operator()(
string_view const& lhs,
string_view const& rhs) const
{
auto p1 = lhs.data();
auto p2 = rhs.data();
auto pend = lhs.end();
char a, b;
while(p1 < pend)
{
a = *p1++;
b = *p2++;
if(a != b)
goto slow;
}
return true;
while(p1 < pend)
{
slow:
if( beast::detail::ascii_tolower(a) !=
beast::detail::ascii_tolower(b))
return false;
a = *p1++;
b = *p2++;
}
return true;
}
};
using map_type = std::unordered_map<
string_view, field, hash, iequal>;
array_type by_name_;
std::vector<map_type> by_size_;
/*
From:
https://www.iana.org/assignments/message-headers/message-headers.xhtml
*/
field_strings()
: v_({{
field_table()
: by_name_({{
"<unknown-field>",
"A-IM",
"Accept",
@ -389,61 +438,103 @@ public:
"Xref"
}})
{
// find the longest field length
std::size_t high = 0;
for(auto const& s : by_name_)
if(high < s.size())
high = s.size();
// build by_size map
// skip field::unknown
by_size_.resize(high + 1);
for(auto& map : by_size_)
map.max_load_factor(.15f);
for(std::size_t i = 1;
i < by_name_.size(); ++i)
{
auto const& s = by_name_[i];
by_size_[s.size()].emplace(
s, static_cast<field>(i));
}
#if 0
// This snippet calculates the performance
// of the hash function and map settings
{
std::vector<std::size_t> hist;
for(auto const& map : by_size_)
{
for(std::size_t i = 0; i < map.bucket_count(); ++i)
{
auto const n = map.bucket_size(i);
if(n > 0)
{
if(hist.size() < n)
hist.resize(n);
++hist[n-1];
}
}
}
}
#endif
}
field
string_to_field(string_view s) const
{
if(s.size() >= by_size_.size())
return field::unknown;
auto const& map = by_size_[s.size()];
if(map.empty())
return field::unknown;
auto it = map.find(s);
if(it == map.end())
return field::unknown;
return it->second;
}
//
// Deprecated
//
using const_iterator =
array_type::const_iterator;
std::size_t
size() const
{
return v_.size();
return by_name_.size();
}
const_iterator
begin() const
{
return v_.begin();
return by_name_.begin();
}
const_iterator
end() const
{
return v_.end();
return by_name_.end();
}
};
inline
field_strings const&
get_field_strings()
field_table const&
get_field_table()
{
static field_strings const fs;
return fs;
static field_table const tab;
return tab;
}
template<class = void>
string_view
to_string(field f)
{
auto const& v = get_field_strings();
auto const& v = get_field_table();
BOOST_ASSERT(static_cast<unsigned>(f) < v.size());
return v.begin()[static_cast<unsigned>(f)];
}
template<class = void>
field
string_to_field(string_view s)
{
auto const& v = get_field_strings();
auto const it = std::lower_bound(
v.begin(), v.end(), s,
beast::iless{});
if(it == v.end())
return field::unknown;
if(! iequals(s, *it))
return field::unknown;
BOOST_ASSERT(iequals(s, to_string(
static_cast<field>(it - v.begin()))));
return static_cast<field>(it - v.begin());
}
} // detail
inline
@ -457,7 +548,7 @@ inline
field
string_to_field(string_view s)
{
return detail::string_to_field(s);
return detail::get_field_table().string_to_field(s);
}
} // http

View File

@ -211,6 +211,9 @@ public:
void
write_eof(error_code& ec);
void
check_header();
private:
Derived&
impl()
@ -218,231 +221,6 @@ private:
return *static_cast<Derived*>(this);
}
template<class C>
class has_on_start_t
{
template<class T, class R =
decltype(std::declval<T>().on_start(), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_start =
std::integral_constant<bool, has_on_start_t<C>::value>;
void
call_on_start(std::true_type)
{
impl().on_start();
}
void
call_on_start(std::false_type)
{
}
template<class C>
class has_on_field_t
{
template<class T, class R =
decltype(std::declval<T>().on_field(
std::declval<std::string const&>(),
std::declval<std::string const&>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_field =
std::integral_constant<bool, has_on_field_t<C>::value>;
void
call_on_field(std::string const& field,
std::string const& value, std::true_type)
{
impl().on_field(field, value);
}
void
call_on_field(std::string const&, std::string const&,
std::false_type)
{
}
template<class C>
class has_on_headers_complete_t
{
template<class T, class R =
decltype(std::declval<T>().on_headers_complete(
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_headers_complete =
std::integral_constant<bool, has_on_headers_complete_t<C>::value>;
void
call_on_headers_complete(error_code& ec, std::true_type)
{
impl().on_headers_complete(ec);
}
void
call_on_headers_complete(error_code&, std::false_type)
{
}
template<class C>
class has_on_request_t
{
template<class T, class R =
decltype(std::declval<T>().on_request(
std::declval<unsigned>(), std::declval<std::string>(),
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_request =
std::integral_constant<bool, has_on_request_t<C>::value>;
void
call_on_request(unsigned method, std::string url,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
impl().on_request(
method, url, major, minor, keep_alive, upgrade);
}
void
call_on_request(unsigned, std::string, int, int, bool, bool,
std::false_type)
{
}
template<class C>
class has_on_response_t
{
template<class T, class R =
decltype(std::declval<T>().on_response(
std::declval<int>(), std::declval<std::string>,
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
#if 0
using type = decltype(check<C>(0));
#else
// VFALCO Trait seems broken for http::parser
using type = std::true_type;
#endif
public:
static bool const value = type::value;
};
template<class C>
using has_on_response =
std::integral_constant<bool, has_on_response_t<C>::value>;
bool
call_on_response(int status, std::string text,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
return impl().on_response(
status, text, major, minor, keep_alive, upgrade);
}
bool
call_on_response(int, std::string, int, int, bool, bool,
std::false_type)
{
// VFALCO Certainly incorrect
return true;
}
template<class C>
class has_on_body_t
{
template<class T, class R =
decltype(std::declval<T>().on_body(
std::declval<void const*>(), std::declval<std::size_t>(),
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_body =
std::integral_constant<bool, has_on_body_t<C>::value>;
void
call_on_body(void const* data, std::size_t bytes,
error_code& ec, std::true_type)
{
impl().on_body(data, bytes, ec);
}
void
call_on_body(void const*, std::size_t,
error_code&, std::false_type)
{
}
template<class C>
class has_on_complete_t
{
template<class T, class R =
decltype(std::declval<T>().on_complete(), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_complete =
std::integral_constant<bool, has_on_complete_t<C>::value>;
void
call_on_complete(std::true_type)
{
impl().on_complete();
}
void
call_on_complete(std::false_type)
{
}
void
check_header();
static int cb_message_start(http_parser*);
static int cb_url(http_parser*, char const*, std::size_t);
static int cb_status(http_parser*, char const*, std::size_t);
@ -526,7 +304,8 @@ nodejs_basic_parser(nodejs_basic_parser&& other)
template<class Derived>
auto
nodejs_basic_parser<Derived>::operator=(nodejs_basic_parser&& other) ->
nodejs_basic_parser<Derived>::
operator=(nodejs_basic_parser&& other) ->
nodejs_basic_parser&
{
state_ = other.state_;
@ -569,7 +348,8 @@ operator=(nodejs_basic_parser const& other) ->
}
template<class Derived>
nodejs_basic_parser<Derived>::nodejs_basic_parser(bool request) noexcept
nodejs_basic_parser<Derived>::
nodejs_basic_parser(bool request) noexcept
{
state_.data = this;
http_parser_init(&state_, request
@ -579,7 +359,8 @@ nodejs_basic_parser<Derived>::nodejs_basic_parser(bool request) noexcept
template<class Derived>
std::size_t
nodejs_basic_parser<Derived>::write(void const* data,
nodejs_basic_parser<Derived>::
write(void const* data,
std::size_t size, error_code& ec)
{
ec_ = &ec;
@ -596,11 +377,11 @@ nodejs_basic_parser<Derived>::write(void const* data,
template<class Derived>
void
nodejs_basic_parser<Derived>::write_eof(error_code& ec)
nodejs_basic_parser<Derived>::
write_eof(error_code& ec)
{
ec_ = &ec;
http_parser_execute(
&state_, hooks(), nullptr, 0);
http_parser_execute(&state_, hooks(), nullptr, 0);
if(! ec)
ec = detail::make_nodejs_error(
static_cast<int>(state_.http_errno));
@ -608,13 +389,12 @@ nodejs_basic_parser<Derived>::write_eof(error_code& ec)
template<class Derived>
void
nodejs_basic_parser<Derived>::check_header()
nodejs_basic_parser<Derived>::
check_header()
{
if(! value_.empty())
{
//detail::trim(value_);
call_on_field(field_, value_,
has_on_field<Derived>{});
impl().on_field(field_, value_);
field_.clear();
value_.clear();
}
@ -622,7 +402,8 @@ nodejs_basic_parser<Derived>::check_header()
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_message_start(http_parser* p)
nodejs_basic_parser<Derived>::
cb_message_start(http_parser* p)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.complete_ = false;
@ -630,13 +411,14 @@ nodejs_basic_parser<Derived>::cb_message_start(http_parser* p)
t.status_.clear();
t.field_.clear();
t.value_.clear();
t.call_on_start(has_on_start<Derived>{});
t.impl().on_start();
return 0;
}
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_url(http_parser* p,
nodejs_basic_parser<Derived>::
cb_url(http_parser* p,
char const* in, std::size_t bytes)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -646,7 +428,8 @@ nodejs_basic_parser<Derived>::cb_url(http_parser* p,
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_status(http_parser* p,
nodejs_basic_parser<Derived>::
cb_status(http_parser* p,
char const* in, std::size_t bytes)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -656,7 +439,8 @@ nodejs_basic_parser<Derived>::cb_status(http_parser* p,
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_header_field(http_parser* p,
nodejs_basic_parser<Derived>::
cb_header_field(http_parser* p,
char const* in, std::size_t bytes)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -667,7 +451,8 @@ nodejs_basic_parser<Derived>::cb_header_field(http_parser* p,
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_header_value(http_parser* p,
nodejs_basic_parser<Derived>::
cb_header_value(http_parser* p,
char const* in, std::size_t bytes)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -677,62 +462,68 @@ nodejs_basic_parser<Derived>::cb_header_value(http_parser* p,
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_headers_complete(http_parser* p)
nodejs_basic_parser<Derived>::
cb_headers_complete(http_parser* p)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.check_header();
t.call_on_headers_complete(*t.ec_,
has_on_headers_complete<Derived>{});
t.impl().on_headers_complete(*t.ec_);
if(*t.ec_)
return 1;
bool const keep_alive =
http_should_keep_alive(p) != 0;
if(p->type == http_parser_type::HTTP_REQUEST)
{
t.call_on_request(p->method, t.url_,
t.impl().on_request(p->method, t.url_,
p->http_major, p->http_minor, keep_alive,
p->upgrade, has_on_request<Derived>{});
p->upgrade);
return 0;
}
return t.call_on_response(p->status_code, t.status_,
return t.impl().on_response(p->status_code, t.status_,
p->http_major, p->http_minor, keep_alive,
p->upgrade, has_on_response<Derived>{}) ? 0 : 1;
p->upgrade) ? 0 : 1;
}
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_body(http_parser* p,
nodejs_basic_parser<Derived>::
cb_body(http_parser* p,
char const* in, std::size_t bytes)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.call_on_body(in, bytes, *t.ec_, has_on_body<Derived>{});
t.impl().on_body(in, bytes, *t.ec_);
return *t.ec_ ? 1 : 0;
}
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_message_complete(http_parser* p)
nodejs_basic_parser<Derived>::
cb_message_complete(http_parser* p)
{
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.complete_ = true;
t.call_on_complete(has_on_complete<Derived>{});
t.impl().on_complete();
return 0;
}
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_chunk_header(http_parser*)
nodejs_basic_parser<Derived>::
cb_chunk_header(http_parser*)
{
return 0;
}
template<class Derived>
int
nodejs_basic_parser<Derived>::cb_chunk_complete(http_parser*)
nodejs_basic_parser<Derived>::
cb_chunk_complete(http_parser*)
{
return 0;
}
//------------------------------------------------------------------------------
/** A HTTP parser.
The parser may only be used once.

View File

@ -224,7 +224,7 @@ public:
void
testSpeed()
{
static std::size_t constexpr Trials = 3;
static std::size_t constexpr Trials = 10;
static std::size_t constexpr Repeat = 500;
creq_ = build_corpus(N/2, std::true_type{});
@ -240,6 +240,15 @@ public:
((Repeat * size_ + 512) / 1024) << "KB in " <<
(Repeat * (creq_.size() + cres_.size())) << " messages";
#if 0
timedTest(Trials, "http::parser",
[&]
{
testParser2<request_parser<dynamic_body>>(Repeat, creq_);
testParser2<response_parser<dynamic_body>>(Repeat, cres_);
});
#endif
#if 1
timedTest(Trials, "http::basic_parser",
[&]
{
@ -250,6 +259,7 @@ public:
false, dynamic_body, fields>>(
Repeat, cres_);
});
#if 1
timedTest(Trials, "nodejs_parser",
[&]
{
@ -260,6 +270,8 @@ public:
false, dynamic_body, fields>>(
Repeat, cres_);
});
#endif
#endif
pass();
}