Optimize field lookups

This commit is contained in:
Vinnie Falco
2017-06-26 10:46:17 -07:00
parent 64ff766b23
commit 2d6859831c
5 changed files with 205 additions and 295 deletions

View File

@ -4,6 +4,7 @@ Version 68:
* Small speed up in fields comparisons * Small speed up in fields comparisons
* Adjust buffer size in fast server * Adjust buffer size in fast server
* Use string_ref in older Boost versions * Use string_ref in older Boost versions
* Optimize field lookups
API Changes: API Changes:

View File

@ -67,9 +67,24 @@ iequals(
return false; return false;
auto p1 = lhs.data(); auto p1 = lhs.data();
auto p2 = rhs.data(); auto p2 = rhs.data();
char a, b;
while(n--) while(n--)
if(ascii_tolower(*p1) != ascii_tolower(*p2)) {
a = *p1++;
b = *p2++;
if(a != b)
goto slow;
}
return true;
while(n--)
{
slow:
if(ascii_tolower(a) != ascii_tolower(b))
return false; return false;
a = *p1++;
b = *p2++;
}
return true; return true;
} }
@ -92,7 +107,7 @@ iequals(
return detail::iequals(lhs, rhs); return detail::iequals(lhs, rhs);
} }
/** A strictly less predicate for strings, using a case-insensitive comparison. /** A case-insensitive less predicate for strings.
The case-comparison operation is defined only for low-ASCII characters. The case-comparison operation is defined only for low-ASCII characters.
*/ */
@ -100,8 +115,8 @@ struct iless
{ {
bool bool
operator()( operator()(
beast::string_view const& lhs, string_view const& lhs,
beast::string_view const& rhs) const string_view const& rhs) const
{ {
using std::begin; using std::begin;
using std::end; using std::end;
@ -115,7 +130,7 @@ struct iless
} }
}; };
/** A predicate for string equality, using a case-insensitive comparison. /** A case-insensitive equality predicate for strings.
The case-comparison operation is defined only for low-ASCII characters. The case-comparison operation is defined only for low-ASCII characters.
*/ */
@ -123,8 +138,8 @@ struct iequal
{ {
bool bool
operator()( operator()(
beast::string_view const& lhs, string_view const& lhs,
beast::string_view const& rhs) const string_view const& rhs) const
{ {
return iequals(lhs, rhs); return iequals(lhs, rhs);
} }

View File

@ -8,8 +8,11 @@
#ifndef BEAST_HTTP_IMPL_FIELD_IPP #ifndef BEAST_HTTP_IMPL_FIELD_IPP
#define BEAST_HTTP_IMPL_FIELD_IPP #define BEAST_HTTP_IMPL_FIELD_IPP
#include <beast/core/string.hpp>
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <unordered_map>
#include <vector>
#include <boost/assert.hpp> #include <boost/assert.hpp>
namespace beast { namespace beast {
@ -17,24 +20,70 @@ namespace http {
namespace detail { namespace detail {
class field_strings struct field_table
{ {
using array_type = using array_type =
std::array<string_view, 352>; std::array<string_view, 352>;
array_type v_; struct hash
{
std::size_t
operator()(string_view const& s) const
{
auto const n = s.size();
return
beast::detail::ascii_tolower(s[0]) *
beast::detail::ascii_tolower(s[n/2]) ^
beast::detail::ascii_tolower(s[n-1]); // hist[] = 331, 10, max_load_factor = 0.15f
}
};
public: struct iequal
using const_iterator = {
array_type::const_iterator; // assumes inputs have equal length
bool
operator()(
string_view const& lhs,
string_view const& rhs) const
{
auto p1 = lhs.data();
auto p2 = rhs.data();
auto pend = lhs.end();
char a, b;
while(p1 < pend)
{
a = *p1++;
b = *p2++;
if(a != b)
goto slow;
}
return true;
while(p1 < pend)
{
slow:
if( beast::detail::ascii_tolower(a) !=
beast::detail::ascii_tolower(b))
return false;
a = *p1++;
b = *p2++;
}
return true;
}
};
using map_type = std::unordered_map<
string_view, field, hash, iequal>;
array_type by_name_;
std::vector<map_type> by_size_;
/* /*
From: From:
https://www.iana.org/assignments/message-headers/message-headers.xhtml https://www.iana.org/assignments/message-headers/message-headers.xhtml
*/ */
field_strings() field_table()
: v_({{ : by_name_({{
"<unknown-field>", "<unknown-field>",
"A-IM", "A-IM",
"Accept", "Accept",
@ -389,61 +438,103 @@ public:
"Xref" "Xref"
}}) }})
{ {
// find the longest field length
std::size_t high = 0;
for(auto const& s : by_name_)
if(high < s.size())
high = s.size();
// build by_size map
// skip field::unknown
by_size_.resize(high + 1);
for(auto& map : by_size_)
map.max_load_factor(.15f);
for(std::size_t i = 1;
i < by_name_.size(); ++i)
{
auto const& s = by_name_[i];
by_size_[s.size()].emplace(
s, static_cast<field>(i));
}
#if 0
// This snippet calculates the performance
// of the hash function and map settings
{
std::vector<std::size_t> hist;
for(auto const& map : by_size_)
{
for(std::size_t i = 0; i < map.bucket_count(); ++i)
{
auto const n = map.bucket_size(i);
if(n > 0)
{
if(hist.size() < n)
hist.resize(n);
++hist[n-1];
}
}
}
}
#endif
} }
field
string_to_field(string_view s) const
{
if(s.size() >= by_size_.size())
return field::unknown;
auto const& map = by_size_[s.size()];
if(map.empty())
return field::unknown;
auto it = map.find(s);
if(it == map.end())
return field::unknown;
return it->second;
}
//
// Deprecated
//
using const_iterator =
array_type::const_iterator;
std::size_t std::size_t
size() const size() const
{ {
return v_.size(); return by_name_.size();
} }
const_iterator const_iterator
begin() const begin() const
{ {
return v_.begin(); return by_name_.begin();
} }
const_iterator const_iterator
end() const end() const
{ {
return v_.end(); return by_name_.end();
} }
}; };
inline inline
field_strings const& field_table const&
get_field_strings() get_field_table()
{ {
static field_strings const fs; static field_table const tab;
return fs; return tab;
} }
template<class = void> template<class = void>
string_view string_view
to_string(field f) to_string(field f)
{ {
auto const& v = get_field_strings(); auto const& v = get_field_table();
BOOST_ASSERT(static_cast<unsigned>(f) < v.size()); BOOST_ASSERT(static_cast<unsigned>(f) < v.size());
return v.begin()[static_cast<unsigned>(f)]; return v.begin()[static_cast<unsigned>(f)];
} }
template<class = void>
field
string_to_field(string_view s)
{
auto const& v = get_field_strings();
auto const it = std::lower_bound(
v.begin(), v.end(), s,
beast::iless{});
if(it == v.end())
return field::unknown;
if(! iequals(s, *it))
return field::unknown;
BOOST_ASSERT(iequals(s, to_string(
static_cast<field>(it - v.begin()))));
return static_cast<field>(it - v.begin());
}
} // detail } // detail
inline inline
@ -457,7 +548,7 @@ inline
field field
string_to_field(string_view s) string_to_field(string_view s)
{ {
return detail::string_to_field(s); return detail::get_field_table().string_to_field(s);
} }
} // http } // http

View File

@ -211,6 +211,9 @@ public:
void void
write_eof(error_code& ec); write_eof(error_code& ec);
void
check_header();
private: private:
Derived& Derived&
impl() impl()
@ -218,231 +221,6 @@ private:
return *static_cast<Derived*>(this); return *static_cast<Derived*>(this);
} }
template<class C>
class has_on_start_t
{
template<class T, class R =
decltype(std::declval<T>().on_start(), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_start =
std::integral_constant<bool, has_on_start_t<C>::value>;
void
call_on_start(std::true_type)
{
impl().on_start();
}
void
call_on_start(std::false_type)
{
}
template<class C>
class has_on_field_t
{
template<class T, class R =
decltype(std::declval<T>().on_field(
std::declval<std::string const&>(),
std::declval<std::string const&>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_field =
std::integral_constant<bool, has_on_field_t<C>::value>;
void
call_on_field(std::string const& field,
std::string const& value, std::true_type)
{
impl().on_field(field, value);
}
void
call_on_field(std::string const&, std::string const&,
std::false_type)
{
}
template<class C>
class has_on_headers_complete_t
{
template<class T, class R =
decltype(std::declval<T>().on_headers_complete(
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_headers_complete =
std::integral_constant<bool, has_on_headers_complete_t<C>::value>;
void
call_on_headers_complete(error_code& ec, std::true_type)
{
impl().on_headers_complete(ec);
}
void
call_on_headers_complete(error_code&, std::false_type)
{
}
template<class C>
class has_on_request_t
{
template<class T, class R =
decltype(std::declval<T>().on_request(
std::declval<unsigned>(), std::declval<std::string>(),
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_request =
std::integral_constant<bool, has_on_request_t<C>::value>;
void
call_on_request(unsigned method, std::string url,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
impl().on_request(
method, url, major, minor, keep_alive, upgrade);
}
void
call_on_request(unsigned, std::string, int, int, bool, bool,
std::false_type)
{
}
template<class C>
class has_on_response_t
{
template<class T, class R =
decltype(std::declval<T>().on_response(
std::declval<int>(), std::declval<std::string>,
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
#if 0
using type = decltype(check<C>(0));
#else
// VFALCO Trait seems broken for http::parser
using type = std::true_type;
#endif
public:
static bool const value = type::value;
};
template<class C>
using has_on_response =
std::integral_constant<bool, has_on_response_t<C>::value>;
bool
call_on_response(int status, std::string text,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
return impl().on_response(
status, text, major, minor, keep_alive, upgrade);
}
bool
call_on_response(int, std::string, int, int, bool, bool,
std::false_type)
{
// VFALCO Certainly incorrect
return true;
}
template<class C>
class has_on_body_t
{
template<class T, class R =
decltype(std::declval<T>().on_body(
std::declval<void const*>(), std::declval<std::size_t>(),
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_body =
std::integral_constant<bool, has_on_body_t<C>::value>;
void
call_on_body(void const* data, std::size_t bytes,
error_code& ec, std::true_type)
{
impl().on_body(data, bytes, ec);
}
void
call_on_body(void const*, std::size_t,
error_code&, std::false_type)
{
}
template<class C>
class has_on_complete_t
{
template<class T, class R =
decltype(std::declval<T>().on_complete(), std::true_type{})>
static R check(int);
template<class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_complete =
std::integral_constant<bool, has_on_complete_t<C>::value>;
void
call_on_complete(std::true_type)
{
impl().on_complete();
}
void
call_on_complete(std::false_type)
{
}
void
check_header();
static int cb_message_start(http_parser*); static int cb_message_start(http_parser*);
static int cb_url(http_parser*, char const*, std::size_t); static int cb_url(http_parser*, char const*, std::size_t);
static int cb_status(http_parser*, char const*, std::size_t); static int cb_status(http_parser*, char const*, std::size_t);
@ -526,7 +304,8 @@ nodejs_basic_parser(nodejs_basic_parser&& other)
template<class Derived> template<class Derived>
auto auto
nodejs_basic_parser<Derived>::operator=(nodejs_basic_parser&& other) -> nodejs_basic_parser<Derived>::
operator=(nodejs_basic_parser&& other) ->
nodejs_basic_parser& nodejs_basic_parser&
{ {
state_ = other.state_; state_ = other.state_;
@ -569,7 +348,8 @@ operator=(nodejs_basic_parser const& other) ->
} }
template<class Derived> template<class Derived>
nodejs_basic_parser<Derived>::nodejs_basic_parser(bool request) noexcept nodejs_basic_parser<Derived>::
nodejs_basic_parser(bool request) noexcept
{ {
state_.data = this; state_.data = this;
http_parser_init(&state_, request http_parser_init(&state_, request
@ -579,7 +359,8 @@ nodejs_basic_parser<Derived>::nodejs_basic_parser(bool request) noexcept
template<class Derived> template<class Derived>
std::size_t std::size_t
nodejs_basic_parser<Derived>::write(void const* data, nodejs_basic_parser<Derived>::
write(void const* data,
std::size_t size, error_code& ec) std::size_t size, error_code& ec)
{ {
ec_ = &ec; ec_ = &ec;
@ -596,11 +377,11 @@ nodejs_basic_parser<Derived>::write(void const* data,
template<class Derived> template<class Derived>
void void
nodejs_basic_parser<Derived>::write_eof(error_code& ec) nodejs_basic_parser<Derived>::
write_eof(error_code& ec)
{ {
ec_ = &ec; ec_ = &ec;
http_parser_execute( http_parser_execute(&state_, hooks(), nullptr, 0);
&state_, hooks(), nullptr, 0);
if(! ec) if(! ec)
ec = detail::make_nodejs_error( ec = detail::make_nodejs_error(
static_cast<int>(state_.http_errno)); static_cast<int>(state_.http_errno));
@ -608,13 +389,12 @@ nodejs_basic_parser<Derived>::write_eof(error_code& ec)
template<class Derived> template<class Derived>
void void
nodejs_basic_parser<Derived>::check_header() nodejs_basic_parser<Derived>::
check_header()
{ {
if(! value_.empty()) if(! value_.empty())
{ {
//detail::trim(value_); impl().on_field(field_, value_);
call_on_field(field_, value_,
has_on_field<Derived>{});
field_.clear(); field_.clear();
value_.clear(); value_.clear();
} }
@ -622,7 +402,8 @@ nodejs_basic_parser<Derived>::check_header()
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_message_start(http_parser* p) nodejs_basic_parser<Derived>::
cb_message_start(http_parser* p)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.complete_ = false; t.complete_ = false;
@ -630,13 +411,14 @@ nodejs_basic_parser<Derived>::cb_message_start(http_parser* p)
t.status_.clear(); t.status_.clear();
t.field_.clear(); t.field_.clear();
t.value_.clear(); t.value_.clear();
t.call_on_start(has_on_start<Derived>{}); t.impl().on_start();
return 0; return 0;
} }
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_url(http_parser* p, nodejs_basic_parser<Derived>::
cb_url(http_parser* p,
char const* in, std::size_t bytes) char const* in, std::size_t bytes)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -646,7 +428,8 @@ nodejs_basic_parser<Derived>::cb_url(http_parser* p,
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_status(http_parser* p, nodejs_basic_parser<Derived>::
cb_status(http_parser* p,
char const* in, std::size_t bytes) char const* in, std::size_t bytes)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -656,7 +439,8 @@ nodejs_basic_parser<Derived>::cb_status(http_parser* p,
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_header_field(http_parser* p, nodejs_basic_parser<Derived>::
cb_header_field(http_parser* p,
char const* in, std::size_t bytes) char const* in, std::size_t bytes)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -667,7 +451,8 @@ nodejs_basic_parser<Derived>::cb_header_field(http_parser* p,
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_header_value(http_parser* p, nodejs_basic_parser<Derived>::
cb_header_value(http_parser* p,
char const* in, std::size_t bytes) char const* in, std::size_t bytes)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
@ -677,62 +462,68 @@ nodejs_basic_parser<Derived>::cb_header_value(http_parser* p,
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_headers_complete(http_parser* p) nodejs_basic_parser<Derived>::
cb_headers_complete(http_parser* p)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.check_header(); t.check_header();
t.call_on_headers_complete(*t.ec_, t.impl().on_headers_complete(*t.ec_);
has_on_headers_complete<Derived>{});
if(*t.ec_) if(*t.ec_)
return 1; return 1;
bool const keep_alive = bool const keep_alive =
http_should_keep_alive(p) != 0; http_should_keep_alive(p) != 0;
if(p->type == http_parser_type::HTTP_REQUEST) if(p->type == http_parser_type::HTTP_REQUEST)
{ {
t.call_on_request(p->method, t.url_, t.impl().on_request(p->method, t.url_,
p->http_major, p->http_minor, keep_alive, p->http_major, p->http_minor, keep_alive,
p->upgrade, has_on_request<Derived>{}); p->upgrade);
return 0; return 0;
} }
return t.call_on_response(p->status_code, t.status_, return t.impl().on_response(p->status_code, t.status_,
p->http_major, p->http_minor, keep_alive, p->http_major, p->http_minor, keep_alive,
p->upgrade, has_on_response<Derived>{}) ? 0 : 1; p->upgrade) ? 0 : 1;
} }
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_body(http_parser* p, nodejs_basic_parser<Derived>::
cb_body(http_parser* p,
char const* in, std::size_t bytes) char const* in, std::size_t bytes)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.call_on_body(in, bytes, *t.ec_, has_on_body<Derived>{}); t.impl().on_body(in, bytes, *t.ec_);
return *t.ec_ ? 1 : 0; return *t.ec_ ? 1 : 0;
} }
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_message_complete(http_parser* p) nodejs_basic_parser<Derived>::
cb_message_complete(http_parser* p)
{ {
auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data); auto& t = *reinterpret_cast<nodejs_basic_parser*>(p->data);
t.complete_ = true; t.complete_ = true;
t.call_on_complete(has_on_complete<Derived>{}); t.impl().on_complete();
return 0; return 0;
} }
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_chunk_header(http_parser*) nodejs_basic_parser<Derived>::
cb_chunk_header(http_parser*)
{ {
return 0; return 0;
} }
template<class Derived> template<class Derived>
int int
nodejs_basic_parser<Derived>::cb_chunk_complete(http_parser*) nodejs_basic_parser<Derived>::
cb_chunk_complete(http_parser*)
{ {
return 0; return 0;
} }
//------------------------------------------------------------------------------
/** A HTTP parser. /** A HTTP parser.
The parser may only be used once. The parser may only be used once.

View File

@ -224,7 +224,7 @@ public:
void void
testSpeed() testSpeed()
{ {
static std::size_t constexpr Trials = 3; static std::size_t constexpr Trials = 10;
static std::size_t constexpr Repeat = 500; static std::size_t constexpr Repeat = 500;
creq_ = build_corpus(N/2, std::true_type{}); creq_ = build_corpus(N/2, std::true_type{});
@ -240,6 +240,15 @@ public:
((Repeat * size_ + 512) / 1024) << "KB in " << ((Repeat * size_ + 512) / 1024) << "KB in " <<
(Repeat * (creq_.size() + cres_.size())) << " messages"; (Repeat * (creq_.size() + cres_.size())) << " messages";
#if 0
timedTest(Trials, "http::parser",
[&]
{
testParser2<request_parser<dynamic_body>>(Repeat, creq_);
testParser2<response_parser<dynamic_body>>(Repeat, cres_);
});
#endif
#if 1
timedTest(Trials, "http::basic_parser", timedTest(Trials, "http::basic_parser",
[&] [&]
{ {
@ -250,6 +259,7 @@ public:
false, dynamic_body, fields>>( false, dynamic_body, fields>>(
Repeat, cres_); Repeat, cres_);
}); });
#if 1
timedTest(Trials, "nodejs_parser", timedTest(Trials, "nodejs_parser",
[&] [&]
{ {
@ -260,6 +270,8 @@ public:
false, dynamic_body, fields>>( false, dynamic_body, fields>>(
Repeat, cres_); Repeat, cres_);
}); });
#endif
#endif
pass(); pass();
} }