diff --git a/CHANGELOG.md b/CHANGELOG.md index 45372099..1c9bdcdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Version 68: * Small speed up in fields comparisons * Adjust buffer size in fast server * Use string_ref in older Boost versions +* Optimize field lookups API Changes: diff --git a/include/beast/core/string.hpp b/include/beast/core/string.hpp index eeb7c59c..e4ec367e 100644 --- a/include/beast/core/string.hpp +++ b/include/beast/core/string.hpp @@ -67,9 +67,24 @@ iequals( return false; auto p1 = lhs.data(); auto p2 = rhs.data(); + char a, b; while(n--) - if(ascii_tolower(*p1) != ascii_tolower(*p2)) + { + a = *p1++; + b = *p2++; + if(a != b) + goto slow; + } + return true; + + while(n--) + { + slow: + if(ascii_tolower(a) != ascii_tolower(b)) return false; + a = *p1++; + b = *p2++; + } return true; } @@ -92,7 +107,7 @@ iequals( return detail::iequals(lhs, rhs); } -/** A strictly less predicate for strings, using a case-insensitive comparison. +/** A case-insensitive less predicate for strings. The case-comparison operation is defined only for low-ASCII characters. */ @@ -100,8 +115,8 @@ struct iless { bool operator()( - beast::string_view const& lhs, - beast::string_view const& rhs) const + string_view const& lhs, + string_view const& rhs) const { using std::begin; using std::end; @@ -115,7 +130,7 @@ struct iless } }; -/** A predicate for string equality, using a case-insensitive comparison. +/** A case-insensitive equality predicate for strings. The case-comparison operation is defined only for low-ASCII characters. */ @@ -123,8 +138,8 @@ struct iequal { bool operator()( - beast::string_view const& lhs, - beast::string_view const& rhs) const + string_view const& lhs, + string_view const& rhs) const { return iequals(lhs, rhs); } diff --git a/include/beast/http/impl/field.ipp b/include/beast/http/impl/field.ipp index b2ddb48a..6d4f2c50 100644 --- a/include/beast/http/impl/field.ipp +++ b/include/beast/http/impl/field.ipp @@ -8,8 +8,11 @@ #ifndef BEAST_HTTP_IMPL_FIELD_IPP #define BEAST_HTTP_IMPL_FIELD_IPP +#include #include #include +#include +#include #include namespace beast { @@ -17,24 +20,70 @@ namespace http { namespace detail { -class field_strings +struct field_table { using array_type = std::array; - array_type v_; + struct hash + { + std::size_t + operator()(string_view const& s) const + { + auto const n = s.size(); + return + beast::detail::ascii_tolower(s[0]) * + beast::detail::ascii_tolower(s[n/2]) ^ + beast::detail::ascii_tolower(s[n-1]); // hist[] = 331, 10, max_load_factor = 0.15f + } + }; -public: - using const_iterator = - array_type::const_iterator; + struct iequal + { + // assumes inputs have equal length + bool + operator()( + string_view const& lhs, + string_view const& rhs) const + { + auto p1 = lhs.data(); + auto p2 = rhs.data(); + auto pend = lhs.end(); + char a, b; + while(p1 < pend) + { + a = *p1++; + b = *p2++; + if(a != b) + goto slow; + } + return true; + while(p1 < pend) + { + slow: + if( beast::detail::ascii_tolower(a) != + beast::detail::ascii_tolower(b)) + return false; + a = *p1++; + b = *p2++; + } + return true; + } + }; + + using map_type = std::unordered_map< + string_view, field, hash, iequal>; + + array_type by_name_; + std::vector by_size_; /* From: https://www.iana.org/assignments/message-headers/message-headers.xhtml */ - field_strings() - : v_({{ + field_table() + : by_name_({{ "", "A-IM", "Accept", @@ -389,61 +438,103 @@ public: "Xref" }}) { + // find the longest field length + std::size_t high = 0; + for(auto const& s : by_name_) + if(high < s.size()) + high = s.size(); + // build by_size map + // skip field::unknown + by_size_.resize(high + 1); + for(auto& map : by_size_) + map.max_load_factor(.15f); + for(std::size_t i = 1; + i < by_name_.size(); ++i) + { + auto const& s = by_name_[i]; + by_size_[s.size()].emplace( + s, static_cast(i)); + } + +#if 0 + // This snippet calculates the performance + // of the hash function and map settings + { + std::vector hist; + for(auto const& map : by_size_) + { + for(std::size_t i = 0; i < map.bucket_count(); ++i) + { + auto const n = map.bucket_size(i); + if(n > 0) + { + if(hist.size() < n) + hist.resize(n); + ++hist[n-1]; + } + } + } + } +#endif } + field + string_to_field(string_view s) const + { + if(s.size() >= by_size_.size()) + return field::unknown; + auto const& map = by_size_[s.size()]; + if(map.empty()) + return field::unknown; + auto it = map.find(s); + if(it == map.end()) + return field::unknown; + return it->second; + } + + // + // Deprecated + // + + using const_iterator = + array_type::const_iterator; + std::size_t size() const { - return v_.size(); + return by_name_.size(); } const_iterator begin() const { - return v_.begin(); + return by_name_.begin(); } const_iterator end() const { - return v_.end(); + return by_name_.end(); } }; inline -field_strings const& -get_field_strings() +field_table const& +get_field_table() { - static field_strings const fs; - return fs; + static field_table const tab; + return tab; } template string_view to_string(field f) { - auto const& v = get_field_strings(); + auto const& v = get_field_table(); BOOST_ASSERT(static_cast(f) < v.size()); return v.begin()[static_cast(f)]; } -template -field -string_to_field(string_view s) -{ - auto const& v = get_field_strings(); - auto const it = std::lower_bound( - v.begin(), v.end(), s, - beast::iless{}); - if(it == v.end()) - return field::unknown; - if(! iequals(s, *it)) - return field::unknown; - BOOST_ASSERT(iequals(s, to_string( - static_cast(it - v.begin())))); - return static_cast(it - v.begin()); -} - } // detail inline @@ -457,7 +548,7 @@ inline field string_to_field(string_view s) { - return detail::string_to_field(s); + return detail::get_field_table().string_to_field(s); } } // http diff --git a/test/benchmarks/nodejs_parser.hpp b/test/benchmarks/nodejs_parser.hpp index f1a76b63..043c5834 100644 --- a/test/benchmarks/nodejs_parser.hpp +++ b/test/benchmarks/nodejs_parser.hpp @@ -211,6 +211,9 @@ public: void write_eof(error_code& ec); + void + check_header(); + private: Derived& impl() @@ -218,231 +221,6 @@ private: return *static_cast(this); } - template - class has_on_start_t - { - template().on_start(), std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_start = - std::integral_constant::value>; - - void - call_on_start(std::true_type) - { - impl().on_start(); - } - - void - call_on_start(std::false_type) - { - } - - template - class has_on_field_t - { - template().on_field( - std::declval(), - std::declval()), - std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_field = - std::integral_constant::value>; - - void - call_on_field(std::string const& field, - std::string const& value, std::true_type) - { - impl().on_field(field, value); - } - - void - call_on_field(std::string const&, std::string const&, - std::false_type) - { - } - - template - class has_on_headers_complete_t - { - template().on_headers_complete( - std::declval()), std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_headers_complete = - std::integral_constant::value>; - - void - call_on_headers_complete(error_code& ec, std::true_type) - { - impl().on_headers_complete(ec); - } - - void - call_on_headers_complete(error_code&, std::false_type) - { - } - - template - class has_on_request_t - { - template().on_request( - std::declval(), std::declval(), - std::declval(), std::declval(), - std::declval(), std::declval()), - std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_request = - std::integral_constant::value>; - - void - call_on_request(unsigned method, std::string url, - int major, int minor, bool keep_alive, bool upgrade, - std::true_type) - { - impl().on_request( - method, url, major, minor, keep_alive, upgrade); - } - - void - call_on_request(unsigned, std::string, int, int, bool, bool, - std::false_type) - { - } - - template - class has_on_response_t - { - template().on_response( - std::declval(), std::declval, - std::declval(), std::declval(), - std::declval(), std::declval()), - std::true_type{})> - static R check(int); - template - static std::false_type check(...); -#if 0 - using type = decltype(check(0)); -#else - // VFALCO Trait seems broken for http::parser - using type = std::true_type; -#endif - public: - static bool const value = type::value; - }; - template - using has_on_response = - std::integral_constant::value>; - - bool - call_on_response(int status, std::string text, - int major, int minor, bool keep_alive, bool upgrade, - std::true_type) - { - return impl().on_response( - status, text, major, minor, keep_alive, upgrade); - } - - bool - call_on_response(int, std::string, int, int, bool, bool, - std::false_type) - { - // VFALCO Certainly incorrect - return true; - } - - template - class has_on_body_t - { - template().on_body( - std::declval(), std::declval(), - std::declval()), std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_body = - std::integral_constant::value>; - - void - call_on_body(void const* data, std::size_t bytes, - error_code& ec, std::true_type) - { - impl().on_body(data, bytes, ec); - } - - void - call_on_body(void const*, std::size_t, - error_code&, std::false_type) - { - } - - template - class has_on_complete_t - { - template().on_complete(), std::true_type{})> - static R check(int); - template - static std::false_type check(...); - using type = decltype(check(0)); - public: - static bool const value = type::value; - }; - template - using has_on_complete = - std::integral_constant::value>; - - void - call_on_complete(std::true_type) - { - impl().on_complete(); - } - - void - call_on_complete(std::false_type) - { - } - - void - check_header(); - static int cb_message_start(http_parser*); static int cb_url(http_parser*, char const*, std::size_t); static int cb_status(http_parser*, char const*, std::size_t); @@ -526,7 +304,8 @@ nodejs_basic_parser(nodejs_basic_parser&& other) template auto -nodejs_basic_parser::operator=(nodejs_basic_parser&& other) -> +nodejs_basic_parser:: +operator=(nodejs_basic_parser&& other) -> nodejs_basic_parser& { state_ = other.state_; @@ -569,7 +348,8 @@ operator=(nodejs_basic_parser const& other) -> } template -nodejs_basic_parser::nodejs_basic_parser(bool request) noexcept +nodejs_basic_parser:: +nodejs_basic_parser(bool request) noexcept { state_.data = this; http_parser_init(&state_, request @@ -579,7 +359,8 @@ nodejs_basic_parser::nodejs_basic_parser(bool request) noexcept template std::size_t -nodejs_basic_parser::write(void const* data, +nodejs_basic_parser:: +write(void const* data, std::size_t size, error_code& ec) { ec_ = &ec; @@ -596,11 +377,11 @@ nodejs_basic_parser::write(void const* data, template void -nodejs_basic_parser::write_eof(error_code& ec) +nodejs_basic_parser:: +write_eof(error_code& ec) { ec_ = &ec; - http_parser_execute( - &state_, hooks(), nullptr, 0); + http_parser_execute(&state_, hooks(), nullptr, 0); if(! ec) ec = detail::make_nodejs_error( static_cast(state_.http_errno)); @@ -608,13 +389,12 @@ nodejs_basic_parser::write_eof(error_code& ec) template void -nodejs_basic_parser::check_header() +nodejs_basic_parser:: +check_header() { if(! value_.empty()) { - //detail::trim(value_); - call_on_field(field_, value_, - has_on_field{}); + impl().on_field(field_, value_); field_.clear(); value_.clear(); } @@ -622,7 +402,8 @@ nodejs_basic_parser::check_header() template int -nodejs_basic_parser::cb_message_start(http_parser* p) +nodejs_basic_parser:: +cb_message_start(http_parser* p) { auto& t = *reinterpret_cast(p->data); t.complete_ = false; @@ -630,13 +411,14 @@ nodejs_basic_parser::cb_message_start(http_parser* p) t.status_.clear(); t.field_.clear(); t.value_.clear(); - t.call_on_start(has_on_start{}); + t.impl().on_start(); return 0; } template int -nodejs_basic_parser::cb_url(http_parser* p, +nodejs_basic_parser:: +cb_url(http_parser* p, char const* in, std::size_t bytes) { auto& t = *reinterpret_cast(p->data); @@ -646,7 +428,8 @@ nodejs_basic_parser::cb_url(http_parser* p, template int -nodejs_basic_parser::cb_status(http_parser* p, +nodejs_basic_parser:: +cb_status(http_parser* p, char const* in, std::size_t bytes) { auto& t = *reinterpret_cast(p->data); @@ -656,7 +439,8 @@ nodejs_basic_parser::cb_status(http_parser* p, template int -nodejs_basic_parser::cb_header_field(http_parser* p, +nodejs_basic_parser:: +cb_header_field(http_parser* p, char const* in, std::size_t bytes) { auto& t = *reinterpret_cast(p->data); @@ -667,7 +451,8 @@ nodejs_basic_parser::cb_header_field(http_parser* p, template int -nodejs_basic_parser::cb_header_value(http_parser* p, +nodejs_basic_parser:: +cb_header_value(http_parser* p, char const* in, std::size_t bytes) { auto& t = *reinterpret_cast(p->data); @@ -677,62 +462,68 @@ nodejs_basic_parser::cb_header_value(http_parser* p, template int -nodejs_basic_parser::cb_headers_complete(http_parser* p) +nodejs_basic_parser:: +cb_headers_complete(http_parser* p) { auto& t = *reinterpret_cast(p->data); t.check_header(); - t.call_on_headers_complete(*t.ec_, - has_on_headers_complete{}); + t.impl().on_headers_complete(*t.ec_); if(*t.ec_) return 1; bool const keep_alive = http_should_keep_alive(p) != 0; if(p->type == http_parser_type::HTTP_REQUEST) { - t.call_on_request(p->method, t.url_, + t.impl().on_request(p->method, t.url_, p->http_major, p->http_minor, keep_alive, - p->upgrade, has_on_request{}); + p->upgrade); return 0; } - return t.call_on_response(p->status_code, t.status_, + return t.impl().on_response(p->status_code, t.status_, p->http_major, p->http_minor, keep_alive, - p->upgrade, has_on_response{}) ? 0 : 1; + p->upgrade) ? 0 : 1; } template int -nodejs_basic_parser::cb_body(http_parser* p, +nodejs_basic_parser:: +cb_body(http_parser* p, char const* in, std::size_t bytes) { auto& t = *reinterpret_cast(p->data); - t.call_on_body(in, bytes, *t.ec_, has_on_body{}); + t.impl().on_body(in, bytes, *t.ec_); return *t.ec_ ? 1 : 0; } template int -nodejs_basic_parser::cb_message_complete(http_parser* p) +nodejs_basic_parser:: +cb_message_complete(http_parser* p) { auto& t = *reinterpret_cast(p->data); t.complete_ = true; - t.call_on_complete(has_on_complete{}); + t.impl().on_complete(); return 0; } template int -nodejs_basic_parser::cb_chunk_header(http_parser*) +nodejs_basic_parser:: +cb_chunk_header(http_parser*) { return 0; } template int -nodejs_basic_parser::cb_chunk_complete(http_parser*) +nodejs_basic_parser:: +cb_chunk_complete(http_parser*) { return 0; } +//------------------------------------------------------------------------------ + /** A HTTP parser. The parser may only be used once. diff --git a/test/benchmarks/parser.cpp b/test/benchmarks/parser.cpp index abe7c6a9..51c32fe9 100644 --- a/test/benchmarks/parser.cpp +++ b/test/benchmarks/parser.cpp @@ -224,7 +224,7 @@ public: void testSpeed() { - static std::size_t constexpr Trials = 3; + static std::size_t constexpr Trials = 10; static std::size_t constexpr Repeat = 500; creq_ = build_corpus(N/2, std::true_type{}); @@ -240,6 +240,15 @@ public: ((Repeat * size_ + 512) / 1024) << "KB in " << (Repeat * (creq_.size() + cres_.size())) << " messages"; +#if 0 + timedTest(Trials, "http::parser", + [&] + { + testParser2>(Repeat, creq_); + testParser2>(Repeat, cres_); + }); +#endif +#if 1 timedTest(Trials, "http::basic_parser", [&] { @@ -250,6 +259,7 @@ public: false, dynamic_body, fields>>( Repeat, cres_); }); +#if 1 timedTest(Trials, "nodejs_parser", [&] { @@ -260,6 +270,8 @@ public: false, dynamic_body, fields>>( Repeat, cres_); }); +#endif +#endif pass(); }