diff --git a/CHANGELOG.md b/CHANGELOG.md index b95daf6c..3b56dc4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Version 61: * Clean close in Secure WebSocket client * Add server-framework SSL HTTP and WebSocket ports * Fix shadowing warnings +* Tidy up http-crawl example API Changes: diff --git a/example/http-client/http_client.cpp b/example/http-client/http_client.cpp index 9ce9393d..0683735d 100644 --- a/example/http-client/http_client.cpp +++ b/example/http-client/http_client.cpp @@ -26,7 +26,7 @@ int main() return EXIT_FAILURE; }; - boost::system::error_code ec; + beast::error_code ec; // Set up an asio socket boost::asio::io_service ios; diff --git a/example/http-crawl/http_crawl.cpp b/example/http-crawl/http_crawl.cpp index f10c6c79..b089e5d7 100644 --- a/example/http-crawl/http_crawl.cpp +++ b/example/http-crawl/http_crawl.cpp @@ -9,13 +9,12 @@ #include #include +#include #include #include +#include #include -using namespace beast::http; -using namespace boost::asio; - template void err(beast::error_code const& ec, String const& what) @@ -23,40 +22,118 @@ err(beast::error_code const& ec, String const& what) std::cerr << what << ": " << ec.message() << std::endl; } -int main(int, char const*[]) +/* This simple program just visits a list with a few + thousand domain names and tries to retrieve and print + the home page of each site. +*/ +int +main(int, char const*[]) { - io_service ios; + // A helper for reporting errors + auto const fail = + [](std::string what, beast::error_code ec) + { + std::cerr << what << ": " << ec.message() << std::endl; + std::cerr.flush(); + return EXIT_FAILURE; + }; + + // Obligatory Asio variable + boost::asio::io_service ios; + + // Loop over all the URLs for(auto const& host : urls_large_data()) { - try + beast::error_code ec; + + // Look up the domain name + boost::asio::ip::tcp::resolver r(ios); + auto lookup = r.resolve(boost::asio::ip::tcp::resolver::query{host, "http"}, ec); + if(ec) { - ip::tcp::resolver r(ios); - auto it = r.resolve( - ip::tcp::resolver::query{host, "http"}); - ip::tcp::socket sock(ios); - connect(sock, it); - auto ep = sock.remote_endpoint(); - request req; - req.method(verb::get); - req.version = 11; - req.target("/"); - req.insert(field::host, host + std::string(":") + - boost::lexical_cast(ep.port())); - req.insert(field::user_agent, "beast/http"); - req.prepare(); - write(sock, req); - response res; - beast::multi_buffer b; - beast::http::read(sock, b, res); - std::cout << res; + fail("resolve", ec); + continue; } - catch(beast::system_error const& ec) + + // Now create a socket and connect + boost::asio::ip::tcp::socket sock(ios); + boost::asio::connect(sock, lookup, ec); + if(ec) { - std::cerr << host << ": " << ec.what(); + fail("connect", ec); + continue; } - catch(...) + + // Grab the remote endpoint + auto ep = sock.remote_endpoint(ec); + if(ec) { - std::cerr << host << ": unknown exception" << std::endl; + fail("remote_endpoint", ec); + continue; } + + // Set up an HTTP GET request + beast::http::request req; + req.version = 11; + req.method(beast::http::verb::get); + req.target("/"); + req.set(beast::http::field::host, host + std::string(":") + + boost::lexical_cast(ep.port())); + req.set(beast::http::field::user_agent, BEAST_VERSION_STRING); + + // Set the Connection: close field, this way the server will close + // the connection. This consumes less resources (no TIME_WAIT) because + // of the graceful close. It also makes things go a little faster. + // + req.set(beast::http::field::connection, "close"); + + // Send the GET request + beast::http::write(sock, req, ec); + if(ec == beast::http::error::end_of_stream) + { + // This special error received on a write indicates that the + // semantics of the sent message are such that the connection + // should be closed after the response is done. We do a TCP/IP + // "half-close" here to shut down our end. + // + sock.shutdown(boost::asio::ip::tcp::socket::shutdown_send, ec); + if(ec) + return fail("shutdown", ec); + } + if(ec) + { + fail("write", ec); + continue; + } + + // This buffer is needed for reading + beast::multi_buffer b; + + // The response will go into this object + beast::http::response res; + + // Read the response + beast::http::read(sock, b, res, ec); + if(ec == beast::http::error::end_of_stream) + { + // This special error means that the other end closed the socket, + // which is what we want since we asked for Connection: close. + // However, we are going through a rather large number of servers + // and sometimes they misbehave. + ec = {}; + } + else if(ec) + { + fail("read", ec); + continue; + } + + // Now we do the other half of the close, + // which is to shut down the receiver. + sock.shutdown(boost::asio::ip::tcp::socket::shutdown_receive, ec); + if(ec) + return fail("shutdown", ec); + + std::cout << res << std::endl; } }