From 4b71aa67e01b2ca0997558f6e07930ab9a178e37 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 12 Jul 2017 18:06:36 -0700 Subject: [PATCH] Refactor chunked parsing (API Change): * parser now has a callback feature for intercepting chunk headers and chunk bodies * The names for basic_parser derived class callbacks have been refactored * basic_parser offers an additional callback for distinguishing chunk body data. Actions Required: * Adjust signatures for required members of user-defined subclasses of basic_parser * Use the new basic_parser chunk callbacks for accessing chunk extensions and chunk bodies. --- CHANGELOG.md | 12 + doc/qbk/04_http/08_chunked_encoding.qbk | 115 +++++ example/doc/http_examples.hpp | 271 +++++++++--- extras/beast/test/fuzz.hpp | 101 +++++ include/beast/core/detail/varint.hpp | 75 ++++ include/beast/http/basic_parser.hpp | 13 +- include/beast/http/chunk_encode.hpp | 38 +- include/beast/http/detail/basic_parser.hpp | 178 +++++++- include/beast/http/detail/rfc7230.hpp | 12 +- include/beast/http/error.hpp | 7 + include/beast/http/impl/basic_parser.ipp | 107 ++--- include/beast/http/impl/chunk_encode.ipp | 470 +++++++++++++++++++-- include/beast/http/impl/error.ipp | 2 + include/beast/http/impl/parser.ipp | 70 ++- include/beast/http/impl/rfc7230.ipp | 8 +- include/beast/http/parser.hpp | 257 +++++++++-- test/benchmarks/parser.cpp | 28 +- test/core/CMakeLists.txt | 1 + test/core/Jamfile | 1 + test/core/detail/varint.cpp | 52 +++ test/http/basic_parser.cpp | 68 ++- test/http/chunk_encode.cpp | 83 ++++ test/http/doc_examples.cpp | 30 +- test/http/error.cpp | 2 + test/http/message_fuzz.hpp | 36 ++ test/http/parser.cpp | 44 -- test/http/test_parser.hpp | 39 +- 27 files changed, 1851 insertions(+), 269 deletions(-) create mode 100644 extras/beast/test/fuzz.hpp create mode 100644 include/beast/core/detail/varint.hpp create mode 100644 test/core/detail/varint.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index bfb3d9fe..0eacb8d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ Version 81: * multi_buffer ctor is explicit * File is not copy-constructible +API Changes: + +* Refactor basic_parser, chunk parsing: + +Actions Required: + +* Adjust signatures for required members of user-defined + subclasses of basic_parser + +* Use the new basic_parser chunk callbacks for accessing + chunk extensions and chunk bodies. + -------------------------------------------------------------------------------- Version 80: diff --git a/doc/qbk/04_http/08_chunked_encoding.qbk b/doc/qbk/04_http/08_chunked_encoding.qbk index 282d1ae9..315b3075 100644 --- a/doc/qbk/04_http/08_chunked_encoding.qbk +++ b/doc/qbk/04_http/08_chunked_encoding.qbk @@ -144,4 +144,119 @@ also emitting the terminating CRLF (`"\r\n"`): [http_snippet_24] +[heading Parsing Chunks] + +The __parser__ automatically removes the chunked transfer coding when +it is the last encoding in the list. However, it also discards the +chunk extensions and does not provide a way to determine the boundaries +between chunks. Advanced applications which need to access the chunk +extensions or read complete individual chunks may use a callback +interface provided by __parser__: + +[table Chunking Parse Callbacks +[[Name][Description]] +[ + [[link beast.ref.beast__http__parser.on_chunk_header `on_chunk_header`]] + [ + Set a callback to be invoked on each chunk header. + + The callback will be invoked once for every chunk in the message + payload, as well as once for the last chunk. The invocation + happens after the chunk header is available but before any body + octets have been parsed. + + The extensions are provided in raw, validated form, use + [link beast.ref.beast__http__basic_chunk_extensions.parse `chunk_extensions::parse`] + to parse the extensions into a structured container for easier access. + The implementation type-erases the callback without requiring + a dynamic allocation. For this reason, the callback object is + passed by a non-constant reference. + + The function object will be called with this equivalent signature: + ``` + void + callback( + std::uint64_t size, // Size of the chunk, zero for the last chunk + string_view extensions, // The chunk-extensions in raw form + error_code& ec); // May be set by the callback to indicate an error + ``` + ] +][ + [[link beast.ref.beast__http__parser.on_chunk_body `on_chunk_body`]] + [ + Set a callback to be invoked on chunk body data. + + The callback will be invoked one or more times to provide + buffers corresponding to the chunk body for the current chunk. + The callback receives the number of octets remaining in this + chunk body including the octets in the buffer provided. + + The callback must return the number of octets actually consumed. + Any octets not consumed will be presented again in a subsequent + invocation of the callback. + The implementation type-erases the callback without requiring + a dynamic allocation. For this reason, the callback object is + passed by a non-constant reference. + + The function object will be called with this equivalent signature: + ``` + std::size_t + callback( + std::uint64_t remain, // Octets remaining in this chunk, includes `body` + string_view body, // A buffer holding some or all of the remainder of the chunk body + error_code& ec); // May be set by the callback to indicate an error + ``` + ] +]] + +This example will read a message header from the stream, and then manually +read each chunk. It recognizes the chunk boundaries and outputs the contents +of each chunk as it comes in. Any chunk extensions are printed, each extension +on its own line. Finally, any trailers promised in the header are printed. + +[example_chunk_parsing] + +Given the HTTP response as input on the left, the output of the function shown +above is shown on the right: + +[table Chunk Parsing Example Output +[[Input][Output]] +[ + [ + ``` + HTTP/1.1 200 OK\r\n + Server: test\r\n + Trailer: Expires, Content-MD5\r\n + Transfer-Encoding: chunked\r\n + \r\n + 5\r\n + First\r\n + d;quality=1.0\r\n + Hello, world!\r\n + e;file=abc.txt;quality=0.7\r\n + The Next Chunk\r\n + 8;last\r\n + Last one\r\n + 0\r\n + Expires: never\r\n + Content-MD5: f4a5c16584f03d90\r\n + \r\n + ``` + ] + [ + ``` + Chunk Body: First + Extension: quality = 1.0 + Chunk Body: Hello, world! + Extension: file = abc.txt + Extension: quality = 0.7 + Chunk Body: The Next Chunk + Extension: last + Chunk Body: Last one + Expires: never + Content-MD5: f4a5c16584f03d90 + ``` + ] +]] + [endsect] diff --git a/example/doc/http_examples.hpp b/example/doc/http_examples.hpp index 7371e15d..cdb3eb40 100644 --- a/example/doc/http_examples.hpp +++ b/example/doc/http_examples.hpp @@ -879,64 +879,84 @@ class custom_parser /// Called after receiving the request-line (isRequest == true). void - on_request( - verb method, // The method verb, verb::unknown if no match - string_view method_str, // The method as a string - string_view target, // The request-target - int version, // The HTTP-version - error_code& ec); // The error returned to the caller, if any + on_request_impl( + verb method, // The method verb, verb::unknown if no match + string_view method_str, // The method as a string + string_view target, // The request-target + int version, // The HTTP-version + error_code& ec); // The error returned to the caller, if any /// Called after receiving the start-line (isRequest == false). void - on_response( - int code, // The status-code - string_view reason, // The obsolete reason-phrase - int version, // The HTTP-version - error_code& ec); // The error returned to the caller, if any + on_response_impl( + int code, // The status-code + string_view reason, // The obsolete reason-phrase + int version, // The HTTP-version + error_code& ec); // The error returned to the caller, if any /// Called after receiving a header field. void - on_field( - field f, // The known-field enumeration constant - string_view name, // The field name string. - string_view value, // The field value - error_code& ec); // The error returned to the caller, if any + on_field_impl( + field f, // The known-field enumeration constant + string_view name, // The field name string. + string_view value, // The field value + error_code& ec); // The error returned to the caller, if any /// Called after the complete header is received. void - on_header( - error_code& ec); // The error returned to the caller, if any + on_header_impl( + error_code& ec); // The error returned to the caller, if any /// Called just before processing the body, if a body exists. void - on_body(boost::optional< + on_body_init_impl( + boost::optional< std::uint64_t> const& - content_length, // Content length if known, else `boost::none` - error_code& ec); // The error returned to the caller, if any + content_length, // Content length if known, else `boost::none` + error_code& ec); // The error returned to the caller, if any /** Called for each piece of the body, if a body exists. - - If present, the chunked Transfer-Encoding will be removed - before this callback is invoked. The function returns - the number of bytes consumed from the input buffer. - Any input octets not consumed will be will be presented - on subsequent calls. + + This is used when there is no chunked transfer coding. + + The function returns the number of bytes consumed from the + input buffer. Any input octets not consumed will be will be + presented on subsequent calls. */ std::size_t - on_data( - string_view s, // A portion of the body - error_code& ec); // The error returned to the caller, if any + on_body_impl( + string_view s, // A portion of the body + error_code& ec); // The error returned to the caller, if any /// Called for each chunk header. void - on_chunk( - std::uint64_t size, // The size of the upcoming chunk - string_view extension, // The chunk-extension (may be empty) - error_code& ec); // The error returned to the caller, if any + on_chunk_header_impl( + std::uint64_t size, // The size of the upcoming chunk, + // or zero for the last chunk + string_view extension, // The chunk extensions (may be empty) + error_code& ec); // The error returned to the caller, if any + + /** Called to deliver the chunk body. + + This is used when there is a chunked transfer coding. The + implementation will automatically remove the encoding before + calling this function. + + The function returns the number of bytes consumed from the + input buffer. Any input octets not consumed will be will be + presented on subsequent calls. + */ + std::size_t + on_chunk_body_impl( + std::uint64_t remain, // The number of bytes remaining in the chunk, + // including what is being passed here. + // or zero for the last chunk + string_view body, // The next piece of the chunk body + error_code& ec); // The error returned to the caller, if any /// Called when the complete message is parsed. void - on_complete(error_code& ec); + on_finish_impl(error_code& ec); public: custom_parser() = default; @@ -948,7 +968,7 @@ public: template void custom_parser:: -on_request(verb method, string_view method_str, +on_request_impl(verb method, string_view method_str, string_view path, int version, error_code& ec) { boost::ignore_unused(method, method_str, path, version); @@ -957,8 +977,11 @@ on_request(verb method, string_view method_str, template void custom_parser:: -on_response(int status, string_view reason, - int version, error_code& ec) +on_response_impl( + int status, + string_view reason, + int version, + error_code& ec) { boost::ignore_unused(status, reason, version); ec = {}; @@ -966,8 +989,11 @@ on_response(int status, string_view reason, template void custom_parser:: -on_field(field f, string_view name, - string_view value, error_code& ec) +on_field_impl( + field f, + string_view name, + string_view value, + error_code& ec) { boost::ignore_unused(f, name, value); ec = {}; @@ -975,14 +1001,15 @@ on_field(field f, string_view name, template void custom_parser:: -on_header(error_code& ec) +on_header_impl(error_code& ec) { ec = {}; } template void custom_parser:: -on_body(boost::optional const& content_length, +on_body_init_impl( + boost::optional const& content_length, error_code& ec) { boost::ignore_unused(content_length); @@ -991,25 +1018,39 @@ on_body(boost::optional const& content_length, template std::size_t custom_parser:: -on_data(string_view s, error_code& ec) +on_body_impl(string_view body, error_code& ec) { - boost::ignore_unused(s); + boost::ignore_unused(body); ec = {}; - return s.size(); + return body.size(); } template void custom_parser:: -on_chunk(std::uint64_t size, - string_view extension, error_code& ec) +on_chunk_header_impl( + std::uint64_t size, + string_view extension, + error_code& ec) { boost::ignore_unused(size, extension); ec = {}; } +template +std::size_t custom_parser:: +on_chunk_body_impl( + std::uint64_t remain, + string_view body, + error_code& ec) +{ + boost::ignore_unused(remain); + ec = {}; + return body.size(); +} + template void custom_parser:: -on_complete(error_code& ec) +on_finish_impl(error_code& ec) { ec = {}; } @@ -1056,5 +1097,139 @@ read_and_print_body( //] + +//------------------------------------------------------------------------------ +// +// Example: Expect 100-continue +// +//------------------------------------------------------------------------------ + +//[example_chunk_parsing + +/** Read a message with a chunked body and print the chunks and extensions +*/ +template< + bool isRequest, + class SyncReadStream, + class DynamicBuffer> +void +print_chunked_body( + std::ostream& os, + SyncReadStream& stream, + DynamicBuffer& buffer, + error_code& ec) +{ + // Declare the parser with an empty body since + // we plan on capturing the chunks ourselves. + parser p; + + // First read the complete header + read_header(stream, buffer, p, ec); + if(ec) + return; + + // This container will hold the extensions for each chunk + chunk_extensions ce; + + // This string will hold the body of each chunk + std::string chunk; + + // Declare our chunk header callback This is invoked + // after each chunk header and also after the last chunk. + auto header_cb = + [&](std::uint64_t size, // Size of the chunk, or zero for the last chunk + string_view extensions, // The raw chunk-extensions string. Already validated. + error_code& ev) // We can set this to indicate an error + { + // Parse the chunk extensions so we can access them easily + ce.parse(extensions, ev); + if(ev) + return; + + // See if the chunk is too big + if(size > (std::numeric_limits::max)()) + { + ev = error::body_limit; + return; + } + + // Make sure we have enough storage, and + // reset the container for the upcoming chunk + chunk.reserve(static_cast(size)); + chunk.clear(); + }; + + // Set the callback. The function requires a non-const reference so we + // use a local variable, since temporaries can only bind to const refs. + p.on_chunk_header(header_cb); + + // Declare the chunk body callback. This is called one or + // more times for each piece of a chunk body. + auto body_cb = + [&](std::uint64_t remain, // The number of bytes left in this chunk + string_view body, // A buffer holding chunk body data + error_code& ec) // We can set this to indicate an error + { + // If this is the last piece of the chunk body, + // set the error so that the call to `read` returns + // and we can process the chunk. + if(remain == body.size()) + ec = error::end_of_chunk; + + // Append this piece to our container + chunk.append(body.data(), body.size()); + + // The return value informs the parser of how much of the body we + // consumed. We will indicate that we consumed everything passed in. + return body.size(); + }; + p.on_chunk_body(body_cb); + + while(! p.is_done()) + { + // Read as much as we can. When we reach the end of the chunk, the chunk + // body callback will make the read return with the end_of_chunk error. + read(stream, buffer, p, ec); + if(! ec) + continue; + else if(ec != error::end_of_chunk) + return; + else + ec.assign(0, ec.category()); + + // We got a whole chunk, print the extensions: + for(auto const& extension : ce) + { + os << "Extension: " << extension.first; + if(! extension.second.empty()) + os << " = " << extension.second << std::endl; + else + os << std::endl; + } + + // Now print the chunk body + os << "Chunk Body: " << chunk << std::endl; + } + + // Get a reference to the parsed message, this is for convenience + auto const& msg = p.get(); + + // Check each field promised in the "Trailer" header and output it + for(auto const& name : token_list{msg[field::trailer]}) + { + // Find the trailer field + auto it = msg.find(name); + if(it == msg.end()) + { + // Oops! They promised the field but failed to deliver it + os << "Missing Trailer: " << name << std::endl; + continue; + } + os << it->name() << ": " << it->value() << std::endl; + } +} + +//] + } // http } // beast diff --git a/extras/beast/test/fuzz.hpp b/extras/beast/test/fuzz.hpp new file mode 100644 index 00000000..8be01658 --- /dev/null +++ b/extras/beast/test/fuzz.hpp @@ -0,0 +1,101 @@ +// +// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef BEAST_TEST_FUZZ_HPP +#define BEAST_TEST_FUZZ_HPP + +#include +#include +#include + +namespace beast { +namespace test { + +class fuzz_rand +{ + std::mt19937 rng_; + +public: + std::mt19937& + rng() + { + return rng_; + } + + template + Unsigned + operator()(Unsigned n) + { + return static_cast( + std::uniform_int_distribution< + Unsigned>{0, n-1}(rng_)); + } +}; + +template +static +void +fuzz( + static_string const& input, + std::size_t repeat, + std::size_t depth, + Rand& r, + F const& f) +{ + static_string mod{input}; + for(auto i = repeat; i; --i) + { + switch(r(4)) + { + case 0: // insert + if(mod.size() >= mod.max_size()) + continue; + mod.insert(r(mod.size() + 1), 1, + static_cast(r(256))); + break; + + case 1: // erase + if(mod.size() == 0) + continue; + mod.erase(r(mod.size()), 1); + break; + + case 2: // swap + { + if(mod.size() <= 1) + continue; + auto off = r(mod.size() - 1); + auto const temp = mod[off]; + mod[off] = mod[off + 1]; + mod[off + 1] = temp; + break; + } + case 3: // repeat + { + if(mod.empty()) + continue; + auto n = (std::min)( + std::geometric_distribution< + std::size_t>{}(r.rng()), + mod.max_size() - mod.size()); + if(n == 0) + continue; + auto off = r(mod.size()); + mod.insert(off, n, mod[off + 1]); + break; + } + } + f(string_view{mod.data(), mod.size()}); + if(depth > 0) + fuzz(mod, repeat, depth - 1, r, f); + } +} + +} // test +} // beast + +#endif diff --git a/include/beast/core/detail/varint.hpp b/include/beast/core/detail/varint.hpp new file mode 100644 index 00000000..4f5b02f0 --- /dev/null +++ b/include/beast/core/detail/varint.hpp @@ -0,0 +1,75 @@ +// +// Copyright (c) 2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef BEAST_DETAIL_VARINT_HPP +#define BEAST_DETAIL_VARINT_HPP + +#include +#include +#include +#include + +namespace beast { +namespace detail { + +// https://developers.google.com/protocol-buffers/docs/encoding#varints + +inline +std::size_t +varint_size(std::size_t value) +{ + std::size_t n = 1; + while(value > 127) + { + ++n; + value /= 128; + } + return n; +} + +template +std::size_t +varint_read(FwdIt& first) +{ + using value_type = typename + std::iterator_traits::value_type; + BOOST_STATIC_ASSERT( + std::is_integral::value && + sizeof(value_type) == 1); + std::size_t value = 0; + std::size_t factor = 1; + while((*first & 0x80) != 0) + { + value += (*first++ & 0x7f) * factor; + factor *= 128; + } + value += *first++ * factor; + return value; +} + +template +void +varint_write(FwdIt& first, std::size_t value) +{ + using value_type = typename + std::iterator_traits::value_type; + BOOST_STATIC_ASSERT( + std::is_integral::value && + sizeof(value_type) == 1); + while(value > 127) + { + *first++ = static_cast( + 0x80 | value); + value /= 128; + } + *first++ = static_cast(value); +} + +} // detail +} // beast + +#endif diff --git a/include/beast/http/basic_parser.hpp b/include/beast/http/basic_parser.hpp index 16de78ef..c7dbd9c2 100644 --- a/include/beast/http/basic_parser.hpp +++ b/include/beast/http/basic_parser.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include namespace beast { @@ -136,16 +137,16 @@ public: using is_request = std::integral_constant; - /// Copy constructor (disallowed) + /// Destructor + ~basic_parser(); + + /// Constructor basic_parser(basic_parser const&) = delete; - /// Copy assignment (disallowed) + /// Constructor basic_parser& operator=(basic_parser const&) = delete; - /// Destructor - ~basic_parser() = default; - - /// Default constructor + /// Constructor basic_parser(); /** Move constructor diff --git a/include/beast/http/chunk_encode.hpp b/include/beast/http/chunk_encode.hpp index 5c2e77f1..a42bc2ed 100644 --- a/include/beast/http/chunk_encode.hpp +++ b/include/beast/http/chunk_encode.hpp @@ -558,8 +558,9 @@ public: /** A set of chunk extensions - This container stores a set of chunk extensions suited - for use with @ref chunk_header and @ref chunk_body. + This container stores a set of chunk extensions suited for use with + @ref chunk_header and @ref chunk_body. The container may be iterated + to access the extensions in their structured form. Meets the requirements of ChunkExtensions */ @@ -569,7 +570,27 @@ class basic_chunk_extensions std::basic_string, Allocator> s_; + std::basic_string, Allocator> range_; + + template + FwdIt + do_parse(FwdIt it, FwdIt last, error_code& ec); + + void + do_insert(string_view name, string_view value); + public: + /** The type of value when iterating. + + The first element of the pair is the name, and the second + element is the value which may be empty. The value is + stored in its raw representation, without quotes or escapes. + */ + using value_type = std::pair; + + class const_iterator; + /// Constructor basic_chunk_extensions() = default; @@ -600,6 +621,13 @@ public: s_.clear(); } + /** Parse a set of chunk extensions + + Any previous extensions will be cleared + */ + void + parse(string_view s, error_code& ec); + /** Insert an extension name with an empty value @param name The name of the extension @@ -623,6 +651,12 @@ public: { return s_; } + + const_iterator + begin() const; + + const_iterator + end() const; }; //------------------------------------------------------------------------------ diff --git a/include/beast/http/detail/basic_parser.hpp b/include/beast/http/detail/basic_parser.hpp index d61a32dc..14500f6a 100644 --- a/include/beast/http/detail/basic_parser.hpp +++ b/include/beast/http/detail/basic_parser.hpp @@ -158,6 +158,7 @@ protected: //-------------------------------------------------------------------------- + static std::pair find_fast( char const* buf, @@ -205,7 +206,6 @@ protected: } } - // VFALCO Can SIMD help this? static char const* find_eom(char const* p, char const* last) @@ -242,6 +242,7 @@ protected: //-------------------------------------------------------------------------- + static char const* parse_token_to_eol( char const* p, @@ -317,6 +318,7 @@ protected: } template + static bool parse_hex(Iter& it, Unsigned& v) { @@ -361,7 +363,7 @@ protected: ec = error::need_more; return; } - if(! detail::is_tchar(*it)) + if(! detail::is_token_char(*it)) break; } if(it + 1 > last) @@ -709,6 +711,178 @@ protected: return; } } + + void + parse_chunk_extensions( + char const*& it, + char const* last, + error_code& ec) + { + /* + chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) + BWS = *( SP / HTAB ) ; "Bad White Space" + chunk-ext-name = token + chunk-ext-val = token / quoted-string + token = 1*tchar + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text + quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + obs-text = %x80-FF + + https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 + */ + loop: + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t' && *it != ';') + return; + // BWS + if(*it == ' ' || *it == '\t') + { + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + } + } + // ';' + if(*it != ';') + { + ec = error::bad_chunk_extension; + return; + } + semi: + ++it; // skip ';' + // BWS + for(;;) + { + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-name + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return; + } + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(! detail::is_token_char(*it)) + break; + } + // BWS [ ";" / "=" ] + { + bool bws; + if(*it == ' ' || *it == '\t') + { + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + } + bws = true; + } + else + { + bws = false; + } + if(*it == ';') + goto semi; + if(*it != '=') + { + if(bws) + ec = error::bad_chunk_extension; + return; + } + ++it; // skip '=' + } + // BWS + for(;;) + { + if(it == last) + { + ec = error::need_more; + return; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-val + if(*it != '"') + { + // token + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return; + } + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(! detail::is_token_char(*it)) + break; + } + } + else + { + // quoted-string + for(;;) + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + if(*it == '"') + break; + if(*it == '\\') + { + ++it; + if(it == last) + { + ec = error::need_more; + return; + } + } + } + ++it; + } + goto loop; + } }; } // detail diff --git a/include/beast/http/detail/rfc7230.hpp b/include/beast/http/detail/rfc7230.hpp index cd7093d9..bfdb74c7 100644 --- a/include/beast/http/detail/rfc7230.hpp +++ b/include/beast/http/detail/rfc7230.hpp @@ -78,7 +78,7 @@ is_text(char c) inline char -is_tchar(char c) +is_token_char(char c) { /* tchar = "!" | "#" | "$" | "%" | "&" | @@ -287,7 +287,7 @@ template void skip_token(FwdIt& it, FwdIt const& last) { - while(it != last && is_tchar(*it)) + while(it != last && is_token_char(*it)) ++it; } @@ -362,7 +362,7 @@ increment() if(it == last) return err(); // param - if(! detail::is_tchar(*it)) + if(! detail::is_token_char(*it)) return err(); auto const p0 = it; skip_token(++it, last); @@ -406,7 +406,7 @@ increment() else { // token - if(! detail::is_tchar(*it)) + if(! detail::is_token_char(*it)) return err(); auto const p2 = it; skip_token(++it, last); @@ -436,7 +436,7 @@ struct opt_token_list_policy return true; } auto const c = *it; - if(detail::is_tchar(c)) + if(detail::is_token_char(c)) { if(need_comma) return false; @@ -446,7 +446,7 @@ struct opt_token_list_policy ++it; if(it == s.end()) break; - if(! detail::is_tchar(*it)) + if(! detail::is_token_char(*it)) break; } v = string_view{&*p0, diff --git a/include/beast/http/error.hpp b/include/beast/http/error.hpp index 861620e3..dbe76051 100644 --- a/include/beast/http/error.hpp +++ b/include/beast/http/error.hpp @@ -72,6 +72,10 @@ enum class error */ need_buffer, + /** The end of a chunk was reached + */ + end_of_chunk, + /** Buffer maximum exceeded. This error is returned when reading HTTP content @@ -138,6 +142,9 @@ enum class error /// The chunk syntax is invalid. bad_chunk, + /// The chunk extension is invalid. + bad_chunk_extension, + /// An obs-fold exceeded an internal limit. bad_obs_fold }; diff --git a/include/beast/http/impl/basic_parser.ipp b/include/beast/http/impl/basic_parser.ipp index 9ea02bae..e49226c6 100644 --- a/include/beast/http/impl/basic_parser.ipp +++ b/include/beast/http/impl/basic_parser.ipp @@ -22,6 +22,12 @@ namespace beast { namespace http { +template +basic_parser:: +~basic_parser() +{ +} + template basic_parser:: basic_parser() @@ -215,7 +221,7 @@ loop: case state::body0: BOOST_ASSERT(! skip_); - impl().on_body(content_length(), ec); + impl().on_body_init_impl(content_length(), ec); if(ec) goto done; state_ = state::body; @@ -230,7 +236,7 @@ loop: case state::body_to_eof0: BOOST_ASSERT(! skip_); - impl().on_body(content_length(), ec); + impl().on_body_init_impl(content_length(), ec); if(ec) goto done; state_ = state::body_to_eof; @@ -244,7 +250,7 @@ loop: break; case state::chunk_header0: - impl().on_body(content_length(), ec); + impl().on_body_init_impl(content_length(), ec); if(ec) goto done; state_ = state::chunk_header; @@ -297,7 +303,7 @@ put_eof(error_code& ec) ec.assign(0, ec.category()); return; } - impl().on_complete(ec); + impl().on_finish_impl(ec); if(ec) return; state_ = state::complete; @@ -401,7 +407,7 @@ parse_start_line( if(version >= 11) f_ |= flagHTTP11; - impl().on_request(string_to_verb(method), + impl().on_request_impl(string_to_verb(method), method, target, version, ec); if(ec) return; @@ -460,7 +466,7 @@ parse_start_line( if(version >= 11) f_ |= flagHTTP11; - impl().on_response( + impl().on_response_impl( status_, reason, version, ec); if(ec) return; @@ -501,7 +507,7 @@ parse_fields(char const*& in, do_field(f, value, ec); if(ec) return; - impl().on_field(f, name, value, ec); + impl().on_field_impl(f, name, value, ec); if(ec) return; in = p; @@ -544,12 +550,12 @@ finish_header(error_code& ec, std::true_type) state_ = state::complete; } - impl().on_header(ec); + impl().on_header_impl(ec); if(ec) return; if(state_ == state::complete) { - impl().on_complete(ec); + impl().on_finish_impl(ec); if(ec) return; } @@ -597,12 +603,12 @@ finish_header(error_code& ec, std::false_type) state_ = state::body_to_eof0; } - impl().on_header(ec); + impl().on_header_impl(ec); if(ec) return; if(state_ == state::complete) { - impl().on_complete(ec); + impl().on_finish_impl(ec); if(ec) return; } @@ -615,7 +621,7 @@ basic_parser:: parse_body(char const*& p, std::size_t n, error_code& ec) { - n = impl().on_data(string_view{p, + n = impl().on_body_impl(string_view{p, beast::detail::clamp(len_, n)}, ec); p += n; len_ -= n; @@ -623,7 +629,7 @@ parse_body(char const*& p, return; if(len_ > 0) return; - impl().on_complete(ec); + impl().on_finish_impl(ec); if(ec) return; state_ = state::complete; @@ -642,7 +648,7 @@ parse_body_to_eof(char const*& p, return; } body_limit_ = body_limit_ - n; - n = impl().on_data(string_view{p, n}, ec); + n = impl().on_body_impl(string_view{p, n}, ec); p += n; if(ec) return; @@ -702,40 +708,34 @@ parse_chunk_header(char const*& p0, skip_ = static_cast< std::size_t>(eol - 2 - p0); - std::uint64_t v; - if(! parse_hex(p, v)) + std::uint64_t size; + if(! parse_hex(p, size)) { ec = error::bad_chunk; return; } - if(v != 0) + if(size != 0) { - if(v > body_limit_) + if(size > body_limit_) { ec = error::body_limit; return; } - body_limit_ -= v; - if(*p == ';') + body_limit_ -= size; + auto const start = p; + parse_chunk_extensions(p, pend, ec); + if(ec) + return; + if(p != eol -2 ) { - // VFALCO TODO Validate extension - impl().on_chunk(v, make_string( - p, eol - 2), ec); - if(ec) - return; - } - else if(p == eol - 2) - { - impl().on_chunk(v, {}, ec); - if(ec) - return; - } - else - { - ec = error::bad_chunk; + ec = error::bad_chunk_extension; return; } - len_ = v; + auto const ext = make_string(start, p); + impl().on_chunk_header_impl(size, ext, ec); + if(ec) + return; + len_ = size; skip_ = 2; p0 = eol; f_ |= flagExpectCRLF; @@ -750,8 +750,8 @@ parse_chunk_header(char const*& p0, BOOST_ASSERT(n >= 5); if(f_ & flagExpectCRLF) BOOST_VERIFY(parse_crlf(p)); - std::uint64_t v; - BOOST_VERIFY(parse_hex(p, v)); + std::uint64_t size; + BOOST_VERIFY(parse_hex(p, size)); eol = find_eol(p, pend, ec); BOOST_ASSERT(! ec); } @@ -765,14 +765,19 @@ parse_chunk_header(char const*& p0, return; } - if(*p == ';') + auto const start = p; + parse_chunk_extensions(p, pend, ec); + if(ec) + return; + if(p != eol - 2) { - // VFALCO TODO Validate extension - impl().on_chunk(0, make_string( - p, eol - 2), ec); - if(ec) - return; + ec = error::bad_chunk_extension; + return; } + auto const ext = make_string(start, p); + impl().on_chunk_header_impl(0, ext, ec); + if(ec) + return; p = eol; parse_fields(p, eom, ec); if(ec) @@ -780,7 +785,7 @@ parse_chunk_header(char const*& p0, BOOST_ASSERT(p == eom); p0 = eom; - impl().on_complete(ec); + impl().on_finish_impl(ec); if(ec) return; state_ = state::complete; @@ -793,15 +798,13 @@ basic_parser:: parse_chunk_body(char const*& p, std::size_t n, error_code& ec) { - n = impl().on_data(string_view{p, - beast::detail::clamp(len_, n)}, ec); + n = impl().on_chunk_body_impl( + len_, string_view{p, + beast::detail::clamp(len_, n)}, ec); p += n; len_ -= n; - if(ec) - return; - if(len_ > 0) - return; - state_ = state::chunk_header; + if(len_ == 0) + state_ = state::chunk_header; } template diff --git a/include/beast/http/impl/chunk_encode.ipp b/include/beast/http/impl/chunk_encode.ipp index aad42c87..d2b059e6 100644 --- a/include/beast/http/impl/chunk_encode.ipp +++ b/include/beast/http/impl/chunk_encode.ipp @@ -8,7 +8,10 @@ #ifndef BEAST_HTTP_IMPL_CHUNK_ENCODE_IPP #define BEAST_HTTP_IMPL_CHUNK_ENCODE_IPP +#include +#include #include +#include namespace beast { namespace http { @@ -210,23 +213,350 @@ chunk_last( //------------------------------------------------------------------------------ +template +class basic_chunk_extensions::const_iterator +{ + friend class basic_chunk_extensions; + + using iter_type = char const*; + + iter_type it_; + typename basic_chunk_extensions::value_type value_; + + explicit + const_iterator(iter_type it) + : it_(it) + { + } + + void + increment(); + +public: + using value_type = typename + basic_chunk_extensions::value_type; + using pointer = value_type const*; + using reference = value_type const&; + using difference_type = std::ptrdiff_t; + using iterator_category = + std::forward_iterator_tag; + + const_iterator() = default; + const_iterator(const_iterator&& other) = default; + const_iterator(const_iterator const& other) = default; + const_iterator& operator=(const_iterator&& other) = default; + const_iterator& operator=(const_iterator const& other) = default; + + bool + operator==(const_iterator const& other) const + { + return it_ == other.it_; + } + + bool + operator!=(const_iterator const& other) const + { + return !(*this == other); + } + + reference + operator*(); + + pointer + operator->() + { + return &(*this); + } + + const_iterator& + operator++() + { + increment(); + return *this; + } + + const_iterator + operator++(int) + { + auto temp = *this; + increment(); + return temp; + } +}; + template void basic_chunk_extensions:: -insert(string_view name) +const_iterator:: +increment() +{ + using beast::detail::varint_read; + auto n = varint_read(it_); + it_ += n; + n = varint_read(it_); + it_ += n; +} + +template +auto +basic_chunk_extensions:: +const_iterator:: +operator*() -> + reference +{ + using beast::detail::varint_read; + auto it = it_; + auto n = varint_read(it); + value_.first = string_view{it, n}; + it += n; + n = varint_read(it); + value_.second = string_view{it, n}; + return value_; +} + +//------------------------------------------------------------------------------ + +template +template +FwdIt +basic_chunk_extensions:: +do_parse(FwdIt it, FwdIt last, error_code& ec) { /* - chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) + BWS = *( SP / HTAB ) ; "Bad White Space" + chunk-ext-name = token + chunk-ext-val = token / quoted-string + token = 1*tchar + quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' / %x5D-7E ; ']'-'~' / obs-text + quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + obs-text = %x80-FF + + https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4667 */ - s_.reserve(1 + name.size()); - s_.push_back(';'); - s_.append(name.data(), name.size()); + using beast::detail::varint_size; + using beast::detail::varint_write; + using CharT = char; + using Traits = std::char_traits; + range_.reserve(static_cast( + std::distance(it, last) * 1.2)); + range_.resize(0); + auto const emit_string = + [this](FwdIt from, FwdIt to) + { + auto const len = + std::distance(from, to); + auto const offset = range_.size(); + range_.resize( + offset + + varint_size(len) + + len); + auto dest = &range_[offset]; + varint_write(dest, len); + Traits::copy(dest, from, len); + }; + auto const emit_string_plus_empty = + [this](FwdIt from, FwdIt to) + { + auto const len = + std::distance(from, to); + auto const offset = range_.size(); + range_.resize( + offset + + varint_size(len) + + len + + varint_size(0)); + auto dest = &range_[offset]; + varint_write(dest, len); + Traits::copy(dest, from, len); + dest += len; + varint_write(dest, 0); + }; + auto const emit_empty_string = + [this] + { + auto const offset = range_.size(); + range_.resize(offset + varint_size(0)); + auto dest = &range_[offset]; + varint_write(dest, 0); + }; +loop: + if(it == last) + { + ec.assign(0, ec.category()); + return it; + } + // BWS + if(*it == ' ' || *it == '\t') + { + for(;;) + { + ++it; + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + if(*it != ' ' && *it != '\t') + break; + } + } + // ';' + if(*it != ';') + { + ec = error::bad_chunk_extension; + return it; + } +semi: + ++it; // skip ';' + // BWS + for(;;) + { + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-name + { + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return it; + } + auto const first = it; + for(;;) + { + ++it; + if(it == last) + { + emit_string_plus_empty(first, it); + return it; + } + if(! detail::is_token_char(*it)) + break; + } + emit_string(first, it); + } + // BWS [ ";" / "=" ] + for(;;) + { + if(*it != ' ' && *it != '\t') + break; + ++it; + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + } + if(*it == ';') + { + emit_empty_string(); + goto semi; + } + if(*it != '=') + { + ec = error::bad_chunk_extension; + return it; + } + ++it; // skip '=' + // BWS + for(;;) + { + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + if(*it != ' ' && *it != '\t') + break; + ++it; + } + // chunk-ext-val + if(*it != '"') + { + // token + if(! detail::is_token_char(*it)) + { + ec = error::bad_chunk_extension; + return it; + } + auto const first = it; + for(;;) + { + ++it; + if(it == last) + break; + if(! detail::is_token_char(*it)) + break; + } + emit_string(first, it); + if(it == last) + return it; + } + else + { + // quoted-string + auto const first = ++it; // skip DQUOTE + // first pass, count chars + std::size_t len = 0; + for(;;) + { + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + if(*it == '"') + break; + if(*it == '\\') + { + ++it; + if(it == last) + { + ec = error::bad_chunk_extension; + return it; + } + } + ++len; + ++it; + } + // now build the string + auto const offset = range_.size(); + range_.resize( + offset + + varint_size(len) + + len); + auto dest = &range_[offset]; + varint_write(dest, len); + it = first; + for(;;) + { + BOOST_ASSERT(it != last); + if(*it == '"') + break; + if(*it == '\\') + { + ++it; + BOOST_ASSERT(it != last); + } + Traits::assign(*dest++, *it++); + } + ++it; // skip DQUOTE + } + goto loop; } template void basic_chunk_extensions:: -insert(string_view name, string_view value) +do_insert(string_view name, string_view value) { /* chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) @@ -238,10 +568,18 @@ insert(string_view name, string_view value) quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) obs-text = %x80-FF */ + if(value.empty()) + { + s_.reserve(1 + name.size()); + s_.push_back(';'); + s_.append(name.data(), name.size()); + return; + } + bool is_token = true; for(auto const c : value) { - if(! detail::is_tchar(c)) + if(! detail::is_token_char(c)) { is_token = false; break; @@ -249,32 +587,114 @@ insert(string_view name, string_view value) } if(is_token) { + // token s_.reserve(1 + name.size() + 1 + value.size()); s_.push_back(';'); s_.append(name.data(), name.size()); s_.push_back('='); s_.append(value.data(), value.size()); - return; } - - // quoted-string - - s_.reserve( - 1 + name.size() + 1 + - 1 + value.size() + 20 + 1); - s_.push_back(';'); - s_.append(name.data(), name.size()); - s_.append("=\"", 2); - for(auto const c : value) + else { - if(c == '\\') - s_.append(R"(\\)", 2); - else if(c == '\"') - s_.append(R"(\")", 2); - else - s_.push_back(c); + // quoted-string + s_.reserve( + 1 + name.size() + 1 + + 1 + value.size() + 20 + 1); + s_.push_back(';'); + s_.append(name.data(), name.size()); + s_.append("=\"", 2); + for(auto const c : value) + { + if(c == '\\') + s_.append(R"(\\)", 2); + else if(c == '\"') + s_.append(R"(\")", 2); + else + s_.push_back(c); + } + s_.push_back('"'); } - s_.push_back('"'); +} + +template +void +basic_chunk_extensions:: +parse(string_view s, error_code& ec) +{ + do_parse(s.data(), s.data() + s.size(), ec); + if(! ec) + { + s_.clear(); + for(auto const& v : *this) + do_insert(v.first, v.second); + } +} + +template +void +basic_chunk_extensions:: +insert(string_view name) +{ + do_insert(name, {}); + + using beast::detail::varint_size; + using beast::detail::varint_write; + auto const offset = range_.size(); + range_.resize( + offset + + varint_size(name.size()) + + name.size() + + varint_size(0)); + auto dest = &range_[offset]; + varint_write(dest, name.size()); + std::memcpy(dest, name.data(), name.size()); + dest += name.size(); + varint_write(dest, 0); +} + +template +void +basic_chunk_extensions:: +insert(string_view name, string_view value) +{ + do_insert(name, value); + + using beast::detail::varint_size; + using beast::detail::varint_write; + auto const offset = range_.size(); + range_.resize( + offset + + varint_size(name.size()) + + name.size() + + varint_size(value.size()) + + value.size()); + auto dest = &range_[offset]; + varint_write(dest, name.size()); + std::memcpy(dest, name.data(), name.size()); + dest += name.size(); + varint_write(dest, value.size()); + std::memcpy(dest, value.data(), value.size()); +} + +template +inline +auto +basic_chunk_extensions:: +begin() const -> + const_iterator +{ + return const_iterator{range_.data()}; +} + +template +inline +auto +basic_chunk_extensions:: +end() const -> + const_iterator +{ + return const_iterator{ + range_.data() + range_.size()}; } } // http diff --git a/include/beast/http/impl/error.ipp b/include/beast/http/impl/error.ipp index fc243851..954292f7 100644 --- a/include/beast/http/impl/error.ipp +++ b/include/beast/http/impl/error.ipp @@ -43,6 +43,7 @@ public: case error::need_more: return "need more"; case error::unexpected_body: return "unexpected body"; case error::need_buffer: return "need buffer"; + case error::end_of_chunk: return "end of chunk"; case error::buffer_overflow: return "buffer overflow"; case error::header_limit: return "header limit exceeded"; case error::body_limit: return "body limit exceeded"; @@ -58,6 +59,7 @@ public: case error::bad_content_length: return "bad Content-Length"; case error::bad_transfer_encoding: return "bad Transfer-Encoding"; case error::bad_chunk: return "bad chunk"; + case error::bad_chunk_extension: return "bad chunk extension"; case error::bad_obs_fold: return "bad obs-fold"; default: diff --git a/include/beast/http/impl/parser.ipp b/include/beast/http/impl/parser.ipp index 73bacd9a..dab34891 100644 --- a/include/beast/http/impl/parser.ipp +++ b/include/beast/http/impl/parser.ipp @@ -14,6 +14,16 @@ namespace beast { namespace http { +template +parser:: +~parser() +{ + if(cb_h_) + cb_h_->~cb_h_t(); + if(cb_b_) + cb_b_->~cb_b_t(); +} + template parser:: parser() @@ -21,6 +31,22 @@ parser() { } +template +parser:: +parser(parser&& other) + : base_type(std::move(other)) + , m_(other.m_) + , wr_(other.wr_) + , wr_inited_(other.wr_inited_) +{ + if(other.cb_h_) + cb_h_ = other.cb_h_->move( + &cb_h_buf_); + if(other.cb_b_) + cb_b_ = other.cb_h_->move( + &cb_b_buf_); +} + template template parser:: @@ -34,17 +60,53 @@ parser(Arg1&& arg1, ArgN&&... argn) template template parser:: -parser(parser&& p, +parser(parser&& other, Args&&... args) - : base_type(std::move(p)) - , m_(p.release(), std::forward(args)...) + : base_type(std::move(other)) + , m_(other.release(), std::forward(args)...) , wr_(m_) { - if(wr_inited_) + if(other.wr_inited_) BOOST_THROW_EXCEPTION(std::invalid_argument{ "moved-from parser has a body"}); } +template +template +void +parser:: +on_chunk_header(Callback& cb) +{ + // Callback may not be constant, caller is responsible for + // managing the lifetime of the callback. Copies are not made. + BOOST_STATIC_ASSERT(! std::is_const::value); + + // Can't set the callback after receiving any chunk data! + BOOST_ASSERT(! wr_inited_); + + if(cb_h_) + cb_h_->~cb_h_t(); + cb_h_ = new(&cb_h_buf_) cb_h_t_impl(cb); +} + +template +template +void +parser:: +on_chunk_body(Callback& cb) +{ + // Callback may not be constant, caller is responsible for + // managing the lifetime of the callback. Copies are not made. + BOOST_STATIC_ASSERT(! std::is_const::value); + + // Can't set the callback after receiving any chunk data! + BOOST_ASSERT(! wr_inited_); + + if(cb_b_) + cb_b_->~cb_b_t(); + cb_b_ = new(&cb_b_buf_) cb_b_t_impl(cb); +} + } // http } // beast diff --git a/include/beast/http/impl/rfc7230.ipp b/include/beast/http/impl/rfc7230.ipp index bce0ec85..12113b58 100644 --- a/include/beast/http/impl/rfc7230.ipp +++ b/include/beast/http/impl/rfc7230.ipp @@ -332,7 +332,7 @@ increment() if(it_ == last_) return err(); auto const c = *it_; - if(detail::is_tchar(c)) + if(detail::is_token_char(c)) { if(need_comma) return err(); @@ -342,7 +342,7 @@ increment() ++it_; if(it_ == last_) break; - if(! detail::is_tchar(*it_)) + if(! detail::is_token_char(*it_)) break; } v_.first = string_view{&*p0, @@ -504,7 +504,7 @@ increment() if(it_ == last_) return err(); auto const c = *it_; - if(detail::is_tchar(c)) + if(detail::is_token_char(c)) { if(need_comma) return err(); @@ -514,7 +514,7 @@ increment() ++it_; if(it_ == last_) break; - if(! detail::is_tchar(*it_)) + if(! detail::is_token_char(*it_)) break; } v_ = string_view{&*p0, diff --git a/include/beast/http/parser.hpp b/include/beast/http/parser.hpp index 108223ef..0d164c36 100644 --- a/include/beast/http/parser.hpp +++ b/include/beast/http/parser.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -56,34 +55,134 @@ class parser template friend class parser; + struct cb_h_exemplar + { + void + operator()(std::uint64_t, string_view, error_code&); + }; + + struct cb_h_t + { + virtual ~cb_h_t() = default; + virtual cb_h_t* move(void* dest) = 0; + virtual void operator()( + std::uint64_t size, + string_view extensions, + error_code& ec) = 0; + }; + + template + struct cb_h_t_impl : cb_h_t + { + Callback& cb_; + + explicit + cb_h_t_impl(Callback& cb) + : cb_(cb) + { + } + + cb_h_t* + move(void* dest) override + { + new(dest) cb_h_t_impl< + Callback>(std::move(*this)); + return this; + } + + void + operator()( + std::uint64_t size, + string_view extensions, + error_code& ec) override + { + cb_(size, extensions, ec); + } + }; + + struct cb_b_exemplar + { + std::size_t + operator()(std::uint64_t, string_view, error_code&); + }; + + struct cb_b_t + { + virtual ~cb_b_t() = default; + virtual cb_b_t* move(void* dest) = 0; + virtual std::size_t operator()( + std::uint64_t remain, + string_view body, + error_code& ec) = 0; + }; + + template + struct cb_b_t_impl : cb_b_t + { + Callback& cb_; + + explicit + cb_b_t_impl(Callback& cb) + : cb_(cb) + { + } + + cb_b_t* + move(void* dest) override + { + new(dest) cb_b_t_impl< + Callback>(std::move(*this)); + return this; + } + + std::size_t + operator()( + std::uint64_t remain, + string_view body, + error_code& ec) override + { + return cb_(remain, body, ec); + } + }; + using base_type = basic_parser>; message> m_; typename Body::writer wr_; - std::function cb_; bool wr_inited_ = false; + cb_h_t* cb_h_ = nullptr; + typename std::aligned_storage< + sizeof(cb_h_t_impl)>::type cb_h_buf_; + + cb_b_t* cb_b_ = nullptr; + typename std::aligned_storage< + sizeof(cb_b_t_impl)>::type cb_b_buf_; + public: /// The type of message returned by the parser using value_type = message>; + /// Destructor + ~parser(); + /// Constructor parser(); - /// Copy constructor (disallowed) + /// Constructor parser(parser const&) = delete; - /// Copy assignment (disallowed) + /// Assignment parser& operator=(parser const&) = delete; - /** Move constructor. + /** Constructor After the move, the only valid operation on the moved-from object is destruction. */ - parser(parser&& other) = default; + parser(parser&& other); /** Constructor @@ -189,34 +288,92 @@ public: return std::move(m_); } - /** Set the on_header callback. + /** Set a callback to be invoked on each chunk header. - When the callback is set, it is called after the parser - receives a complete header. The function must be invocable with - this signature: + The callback will be invoked once for every chunk in the message + payload, as well as once for the last chunk. The invocation + happens after the chunk header is available but before any body + octets have been parsed. + + The extensions are provided in raw, validated form, use + @ref chunk_extensions::parse to parse the extensions into a + structured container for easier access. + The implementation type-erases the callback without requiring + a dynamic allocation. For this reason, the callback object is + passed by a non-constant reference. + + @par Example @code - void callback( - parser& p, // `*this` - error_code& ec) // Set to the error, if any + auto callback = + [](std::uint64_t size, string_view extensions, error_code& ec) + { + //... + }; + parser.on_chunk_header(callback); @endcode - The callback will ensure that `!ec` is `true` if there was - no error or set to the appropriate error code if there was one. - The callback may not call @ref put or @ref put_eof, or - else the behavior is undefined. + @param cb The function to set, which must be invocable with + this equivalent signature: + @code + void + on_chunk_header( + std::uint64_t size, // Size of the chunk, zero for the last chunk + string_view extensions, // The chunk-extensions in raw form + error_code& ec); // May be set by the callback to indicate an error + @endcode */ + template void - on_header(std::function cb) - { - cb_ = std::move(cb); - } + on_chunk_header(Callback& cb); + + /** Set a callback to be invoked on chunk body data + + The provided function object will be invoked one or more times + to provide buffers corresponding to the chunk body for the current + chunk. The callback receives the number of octets remaining in this + chunk body including the octets in the buffer provided. + + The callback must return the number of octets actually consumed. + Any octets not consumed will be presented again in a subsequent + invocation of the callback. + The implementation type-erases the callback without requiring + a dynamic allocation. For this reason, the callback object is + passed by a non-constant reference. + + @par Example + @code + auto callback = + [](std::uint64_t remain, string_view body, error_code& ec) + { + //... + }; + parser.on_chunk_body(callback); + @endcode + + @param cb The function to set, which must be invocable with + this equivalent signature: + @code + std::size_t + on_chunk_header( + std::uint64_t remain, // Octets remaining in this chunk, includes `body` + string_view body, // A buffer holding some or all of the remainder of the chunk body + error_code& ec); // May be set by the callback to indicate an error + @endcode + */ + template + void + on_chunk_body(Callback& cb); private: friend class basic_parser; void - on_request(verb method, string_view method_str, - string_view target, int version, error_code& ec) + on_request_impl( + verb method, + string_view method_str, + string_view target, + int version, + error_code& ec) { try { @@ -235,9 +392,11 @@ private: } void - on_response(int code, + on_response_impl( + int code, string_view reason, - int version, error_code& ec) + int version, + error_code& ec) { m_.result(code); m_.version = version; @@ -253,8 +412,11 @@ private: } void - on_field(field name, string_view name_string, - string_view value, error_code& ec) + on_field_impl( + field name, + string_view name_string, + string_view value, + error_code& ec) { try { @@ -268,39 +430,54 @@ private: } void - on_header(error_code& ec) + on_header_impl(error_code& ec) { - if(cb_) - cb_(*this, ec); - else - ec.assign(0, ec.category()); + ec.assign(0, ec.category()); } void - on_body(boost::optional< - std::uint64_t> const& content_length, - error_code& ec) + on_body_init_impl( + boost::optional const& content_length, + error_code& ec) { wr_.init(content_length, ec); wr_inited_ = true; } std::size_t - on_data(string_view s, error_code& ec) + on_body_impl( + string_view body, + error_code& ec) { return wr_.put(boost::asio::buffer( - s.data(), s.size()), ec); + body.data(), body.size()), ec); } void - on_chunk(std::uint64_t, - string_view, error_code& ec) + on_chunk_header_impl( + std::uint64_t size, + string_view extensions, + error_code& ec) { + if(cb_h_) + return (*cb_h_)(size, extensions, ec); ec.assign(0, ec.category()); } + std::size_t + on_chunk_body_impl( + std::uint64_t remain, + string_view body, + error_code& ec) + { + if(cb_b_) + return (*cb_b_)(remain, body, ec); + return wr_.put(boost::asio::buffer( + body.data(), body.size()), ec); + } + void - on_complete(error_code& ec) + on_finish_impl(error_code& ec) { wr_.finish(ec); } diff --git a/test/benchmarks/parser.cpp b/test/benchmarks/parser.cpp index b7c67f42..175b88e8 100644 --- a/test/benchmarks/parser.cpp +++ b/test/benchmarks/parser.cpp @@ -166,14 +166,14 @@ public: boost::asio::mutable_buffers_1; void - on_request(verb, string_view, + on_request_impl(verb, string_view, string_view, int, error_code& ec) { ec.assign(0, ec.category()); } void - on_response(int, + on_response_impl(int, string_view, int, error_code& ec) { @@ -181,42 +181,50 @@ public: } void - on_field(field, + on_field_impl(field, string_view, string_view, error_code& ec) { ec.assign(0, ec.category()); } void - on_header(error_code& ec) + on_header_impl(error_code& ec) { ec.assign(0, ec.category()); } void - on_body(boost::optional const&, + on_body_init_impl( + boost::optional const&, error_code& ec) { ec.assign(0, ec.category()); } std::size_t - on_data(string_view s, error_code& ec) + on_body_impl(string_view s, error_code& ec) { ec.assign(0, ec.category()); return s.size(); } void - on_chunk(std::uint64_t, - string_view, - error_code& ec) + on_chunk_header_impl(std::uint64_t, + string_view, error_code& ec) { ec.assign(0, ec.category()); } + std::size_t + on_chunk_body_impl(std::uint64_t, + string_view s, error_code& ec) + { + ec.assign(0, ec.category()); + return s.size(); + } + void - on_complete(error_code& ec) + on_finish_impl(error_code& ec) { ec.assign(0, ec.category()); } diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt index 39749c57..9fa0467f 100644 --- a/test/core/CMakeLists.txt +++ b/test/core/CMakeLists.txt @@ -44,6 +44,7 @@ add_executable (core-tests base64.cpp empty_base_optimization.cpp sha1.cpp + detail/varint.cpp ) target_link_libraries(core-tests diff --git a/test/core/Jamfile b/test/core/Jamfile index 159b7ae5..62f63bee 100644 --- a/test/core/Jamfile +++ b/test/core/Jamfile @@ -38,4 +38,5 @@ unit-test core-tests : base64.cpp empty_base_optimization.cpp sha1.cpp + detail/varint.cpp ; diff --git a/test/core/detail/varint.cpp b/test/core/detail/varint.cpp new file mode 100644 index 00000000..f6da8dce --- /dev/null +++ b/test/core/detail/varint.cpp @@ -0,0 +1,52 @@ +// +// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained. +#include + +#include + +namespace beast { + +class varint_test : public beast::unit_test::suite +{ +public: + void + testVarint() + { + using beast::detail::varint_read; + using beast::detail::varint_size; + using beast::detail::varint_write; + std::size_t n0 = 0; + std::size_t n1 = 1; + for(;;) + { + char buf[16]; + BOOST_ASSERT(sizeof(buf) >= varint_size(n0)); + auto it = &buf[0]; + varint_write(it, n0); + it = &buf[0]; + auto n = varint_read(it); + BEAST_EXPECT(n == n0); + n = n0 + n1; + if(n < n1) + break; + n0 = n1; + n1 = n; + } + } + + void + run() + { + testVarint(); + } +}; + +BEAST_DEFINE_TESTSUITE(varint,core,beast); + +} // beast diff --git a/test/http/basic_parser.cpp b/test/http/basic_parser.cpp index 6303fa7f..0716044d 100644 --- a/test/http/basic_parser.cpp +++ b/test/http/basic_parser.cpp @@ -8,6 +8,7 @@ // Test that header file is self-contained. #include +#include "message_fuzz.hpp" #include "test_parser.hpp" #include @@ -17,6 +18,7 @@ #include #include #include +#include #include namespace beast { @@ -399,7 +401,7 @@ public: BEAST_EXPECT(p.got_on_field == 2); BEAST_EXPECT(p.got_on_header == 1); BEAST_EXPECT(p.got_on_body == 1); - BEAST_EXPECT(p.got_on_chunk == 1); + BEAST_EXPECT(p.got_on_chunk == 2); BEAST_EXPECT(p.got_on_complete == 1); }); parsegrind>( @@ -415,7 +417,7 @@ public: BEAST_EXPECT(p.got_on_field == 2); BEAST_EXPECT(p.got_on_header == 1); BEAST_EXPECT(p.got_on_body == 1); - BEAST_EXPECT(p.got_on_chunk == 1); + BEAST_EXPECT(p.got_on_chunk == 2); BEAST_EXPECT(p.got_on_complete == 1); }); } @@ -1104,7 +1106,64 @@ public: //-------------------------------------------------------------------------- void - testFuzz1() + testFuzz() + { + auto const grind = + [&](string_view s) + { + static_string<100> ss{s}; + test::fuzz_rand r; + test::fuzz(ss, 4, 5, r, + [&](string_view s) + { + error_code ec; + test_parser p; + p.eager(true); + p.put(boost::asio::const_buffers_1{ + s.data(), s.size()}, ec); + }); + }; + auto const good = + [&](string_view s) + { + std::string msg = + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "0" + s.to_string() + "\r\n" + "\r\n"; + error_code ec; + test_parser p; + p.eager(true); + p.put(boost::asio::const_buffers_1{ + msg.data(), msg.size()}, ec); + BEAST_EXPECTS(! ec, ec.message()); + grind(msg); + }; + auto const bad = + [&](string_view s) + { + std::string msg = + "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "0" + s.to_string() + "\r\n" + "\r\n"; + error_code ec; + test_parser p; + p.eager(true); + p.put(boost::asio::const_buffers_1{ + msg.data(), msg.size()}, ec); + BEAST_EXPECT(ec); + grind(msg); + }; + chunkExtensionsTest(good, bad); + } + + //-------------------------------------------------------------------------- + + void + testRegression1() { // crash_00cda0b02d5166bd1039ddb3b12618cd80da75f3 unsigned char buf[] ={ @@ -1154,7 +1213,8 @@ public: testIssue430(); testIssue452(); testIssue496(); - testFuzz1(); + testFuzz(); + testRegression1(); } }; diff --git a/test/http/chunk_encode.cpp b/test/http/chunk_encode.cpp index 49c7e752..ea8ee094 100644 --- a/test/http/chunk_encode.cpp +++ b/test/http/chunk_encode.cpp @@ -8,9 +8,14 @@ // Test that header file is self-contained. #include +#include "message_fuzz.hpp" + +#include #include +#include #include #include +#include namespace beast { namespace http { @@ -194,17 +199,94 @@ public: void testChunkExtensions() { + auto const str = + [](chunk_extensions const& ce) + { + std::string s; + for(auto const& v : ce) + { + s.append(v.first.to_string()); + s.push_back(','); + if(! v.second.empty()) + { + s.append(v.second.to_string()); + s.push_back(','); + } + } + return s; + }; chunk_extensions ce; ce.insert("x"); BEAST_EXPECT(ce.str() == ";x"); + BEAST_EXPECT(str(ce) == "x,"); ce.insert("y", "z"); BEAST_EXPECT(ce.str() == ";x;y=z"); + BEAST_EXPECT(str(ce) == "x,y,z,"); ce.insert("z", R"(")"); BEAST_EXPECT(ce.str() == R"(;x;y=z;z="\"")"); + BEAST_EXPECT(str(ce) == R"(x,y,z,z,",)"); ce.insert("p", R"(\)"); BEAST_EXPECT(ce.str() == R"(;x;y=z;z="\"";p="\\")"); + BEAST_EXPECT(str(ce) == R"(x,y,z,z,",p,\,)"); ce.insert("q", R"(1"2\)"); BEAST_EXPECT(ce.str() == R"(;x;y=z;z="\"";p="\\";q="1\"2\\")"); + BEAST_EXPECT(str(ce) == R"(x,y,z,z,",p,\,q,1"2\,)"); + } + + void + testParseChunkExtensions() + { + auto const grind = + [&](string_view s) + { + error_code ec; + static_string<200> ss{s}; + test::fuzz_rand r; + for(auto i = 3; i--;) + { + test::fuzz(ss, 5, 5, r, + [&](string_view s) + { + chunk_extensions c1; + c1.parse(s, ec); + if(ec) + { + pass(); + return; + } + chunk_extensions c2; + c2.parse(c1.str(), ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + chunk_extensions c3; + for(auto const& v : c2) + if(v.second.empty()) + c3.insert(v.first); + else + c3.insert(v.first, v.second); + BEAST_EXPECTS(c2.str() == c3.str(), c3.str()); + }); + } + }; + auto const good = + [&](string_view s) + { + error_code ec; + chunk_extensions ce; + ce.parse(s, ec); + BEAST_EXPECTS(! ec, ec.message()); + grind(s); + }; + auto const bad = + [&](string_view s) + { + error_code ec; + chunk_extensions ce; + ce.parse(s, ec); + BEAST_EXPECT(ec); + grind(s); + }; + chunkExtensionsTest(good, bad); } void @@ -215,6 +297,7 @@ public: testChunkBody(); testChunkFinal(); testChunkExtensions(); + testParseChunkExtensions(); } }; diff --git a/test/http/doc_examples.cpp b/test/http/doc_examples.cpp index 4bdc1aa1..f43fc7e2 100644 --- a/test/http/doc_examples.cpp +++ b/test/http/doc_examples.cpp @@ -22,8 +22,9 @@ #include #include #include -#include +#include #include +#include namespace beast { namespace http { @@ -369,7 +370,7 @@ public: ostream(c.client.buffer) << "HTTP/1.1 200 OK\r\n" "Server: test\r\n" - "Accept: Expires, Content-MD5\r\n" + "Trailer: Expires, Content-MD5\r\n" "Transfer-Encoding: chunked\r\n" "\r\n" "5\r\n" @@ -385,16 +386,23 @@ public: "Content-MD5: f4a5c16584f03d90\r\n" "\r\n"; - flat_buffer b; - response_parser p; - read_header(c.client, b, p); - BOOST_ASSERT(p.is_chunked()); - //while(! p.is_done()) - { - // read the chunk header? - // read the next chunk? - } + error_code ec; + flat_buffer b; + std::stringstream ss; + print_chunked_body(ss, c.client, b, ec); + BEAST_EXPECTS(! ec, ec.message()); + BEAST_EXPECT(ss.str() == + "Chunk Body: First\n" + "Extension: quality = 1.0\n" + "Chunk Body: Hello, world!\n" + "Extension: file = abc.txt\n" + "Extension: quality = 0.7\n" + "Chunk Body: The Next Chunk\n" + "Extension: last\n" + "Chunk Body: Last one\n" + "Expires: never\n" + "Content-MD5: f4a5c16584f03d90\n"); } diff --git a/test/http/error.cpp b/test/http/error.cpp index 52d986bb..7489b372 100644 --- a/test/http/error.cpp +++ b/test/http/error.cpp @@ -41,6 +41,7 @@ public: check("beast.http", error::need_more); check("beast.http", error::unexpected_body); check("beast.http", error::need_buffer); + check("beast.http", error::end_of_chunk); check("beast.http", error::buffer_overflow); check("beast.http", error::body_limit); check("beast.http", error::bad_alloc); @@ -56,6 +57,7 @@ public: check("beast.http", error::bad_content_length); check("beast.http", error::bad_transfer_encoding); check("beast.http", error::bad_chunk); + check("beast.http", error::bad_chunk_extension); check("beast.http", error::bad_obs_fold); } }; diff --git a/test/http/message_fuzz.hpp b/test/http/message_fuzz.hpp index 2c1ff25b..f68910de 100644 --- a/test/http/message_fuzz.hpp +++ b/test/http/message_fuzz.hpp @@ -562,6 +562,42 @@ public: using message_fuzz = message_fuzz_t<>; +template +void +chunkExtensionsTest( + Good const& good, Bad const& bad) +{ + good(""); + good(";x"); + good(";x;y"); + good(";x=y"); + good(";x;y=z"); + good(" ;x"); + good("\t;x"); + good(" \t;x"); + good("\t ;x"); + good(" ; x"); + good(" ;\tx"); + good("\t ; \tx"); + good(";x= y"); + good(" ;x= y"); + good(" ; x= y"); + good(R"(;x="\"")"); + good(R"(;x="\\")"); + good(R"(;x;y=z;z="\"";p="\\";q="1\"2\\")"); + + bad(" "); + bad(";"); + bad("="); + bad(" ;"); + bad("; "); + bad(" ; "); + bad(" ; x "); + bad(";x ="); + bad(";x = "); + bad(";x=="); +} + } // http } // beast diff --git a/test/http/parser.cpp b/test/http/parser.cpp index cae9d904..f63e2834 100644 --- a/test/http/parser.cpp +++ b/test/http/parser.cpp @@ -328,49 +328,6 @@ public: BEAST_EXPECT(used == 0); } - void - testCallback() - { - { - multi_buffer b; - ostream(b) << - "POST / HTTP/1.1\r\n" - "Content-Length: 2\r\n" - "\r\n" - "**"; - error_code ec; - parser p; - p.eager(true); - p.put(b.data(), ec); - p.on_header( - [this](parser& p, error_code& ec) - { - BEAST_EXPECT(p.is_header_done()); - ec.assign(0, ec.category()); - }); - BEAST_EXPECTS(! ec, ec.message()); - } - { - multi_buffer b; - ostream(b) << - "POST / HTTP/1.1\r\n" - "Content-Length: 2\r\n" - "\r\n" - "**"; - error_code ec; - parser p; - p.eager(true); - p.put(b.data(), ec); - p.on_header( - [this](parser&, error_code& ec) - { - ec.assign(errc::bad_message, - generic_category()); - }); - BEAST_EXPECTS(! ec, ec.message()); - } - } - void run() override { @@ -378,7 +335,6 @@ public: testNeedMore(); testNeedMore(); testGotSome(); - testCallback(); } }; diff --git a/test/http/test_parser.hpp b/test/http/test_parser.hpp index 7a17ee9f..0f922d5c 100644 --- a/test/http/test_parser.hpp +++ b/test/http/test_parser.hpp @@ -51,7 +51,7 @@ public: } void - on_request(verb, string_view method_str_, + on_request_impl(verb, string_view method_str_, string_view path_, int version_, error_code& ec) { method = std::string( @@ -67,7 +67,7 @@ public: } void - on_response(int code, + on_response_impl(int code, string_view reason_, int version_, error_code& ec) { @@ -83,7 +83,7 @@ public: } void - on_field(field, string_view name, + on_field_impl(field, string_view name, string_view value, error_code& ec) { ++got_on_field; @@ -95,7 +95,7 @@ public: } void - on_header(error_code& ec) + on_header_impl(error_code& ec) { ++got_on_header; if(fc_) @@ -105,9 +105,9 @@ public: } void - on_body(boost::optional< - std::uint64_t> const& content_length_, - error_code& ec) + on_body_init_impl( + boost::optional const& content_length_, + error_code& ec) { ++got_on_body; got_content_length = @@ -119,7 +119,7 @@ public: } std::size_t - on_data(string_view s, + on_body_impl(string_view s, error_code& ec) { body.append(s.data(), s.size()); @@ -131,8 +131,10 @@ public: } void - on_chunk(std::uint64_t, - string_view, error_code& ec) + on_chunk_header_impl( + std::uint64_t, + string_view, + error_code& ec) { ++got_on_chunk; if(fc_) @@ -141,8 +143,23 @@ public: ec.assign(0, ec.category()); } + std::size_t + on_chunk_body_impl( + std::uint64_t, + string_view s, + error_code& ec) + { + body.append(s.data(), s.size()); + if(fc_) + fc_->fail(ec); + else + ec.assign(0, ec.category()); + return s.size(); + } + + void - on_complete(error_code& ec) + on_finish_impl(error_code& ec) { ++got_on_complete; if(fc_)