Files
boost_beast/example/http-crawl/http_crawl.cpp

138 lines
4.0 KiB
C++
Raw Normal View History

//
2017-02-06 20:07:03 -05:00
// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
2017-07-20 08:01:46 -07:00
#include "urls_large_data.hpp"
2017-07-20 08:01:46 -07:00
#include <beast/core/multi_buffer.hpp>
2016-06-20 10:53:31 -04:00
#include <beast/http.hpp>
2017-06-18 18:44:28 -07:00
#include <beast/version.hpp>
2017-07-20 08:01:46 -07:00
#include <boost/asio.hpp>
2017-06-18 18:44:28 -07:00
#include <cstdlib>
2017-07-20 08:01:46 -07:00
#include <iostream>
2017-06-19 14:41:28 -07:00
using tcp = boost::asio::ip::tcp; // from <boost/asio.hpp>
namespace http = beast::http; // from <beast/http.hpp>
2017-07-20 08:01:46 -07:00
template<class String>
void
err(beast::error_code const& ec, String const& what)
2017-07-20 08:01:46 -07:00
{
std::cerr << what << ": " << ec.message() << std::endl;
}
2017-06-18 18:44:28 -07:00
/* This simple program just visits a list with a few
thousand domain names and tries to retrieve and print
the home page of each site.
*/
int
main(int, char const*[])
2017-07-20 08:01:46 -07:00
{
2017-06-18 18:44:28 -07:00
// A helper for reporting errors
auto const fail =
[](std::string what, beast::error_code ec)
{
std::cerr << what << ": " << ec.message() << std::endl;
std::cerr.flush();
return EXIT_FAILURE;
};
// Obligatory Asio variable
boost::asio::io_service ios;
// Loop over all the URLs
2017-07-20 08:01:46 -07:00
for(auto const& host : urls_large_data())
{
2017-06-18 18:44:28 -07:00
beast::error_code ec;
// Look up the domain name
2017-06-19 14:41:28 -07:00
tcp::resolver r(ios);
2017-06-19 07:28:20 -07:00
auto lookup = r.resolve({host, "http"}, ec);
2017-06-18 18:44:28 -07:00
if(ec)
{
fail("resolve", ec);
continue;
}
// Now create a socket and connect
2017-06-19 14:41:28 -07:00
tcp::socket sock(ios);
2017-06-18 18:44:28 -07:00
boost::asio::connect(sock, lookup, ec);
if(ec)
{
fail("connect", ec);
continue;
}
// Grab the remote endpoint
auto ep = sock.remote_endpoint(ec);
if(ec)
{
fail("remote_endpoint", ec);
continue;
2017-07-20 08:01:46 -07:00
}
2017-06-18 18:44:28 -07:00
// Set up an HTTP GET request
http::request<http::string_body> req{http::verb::get, "/", 11};
req.set(http::field::host, host + std::string(":") + std::to_string(ep.port()));
2017-06-19 14:41:28 -07:00
req.set(http::field::user_agent, BEAST_VERSION_STRING);
2017-06-18 18:44:28 -07:00
// Set the Connection: close field, this way the server will close
// the connection. This consumes less resources (no TIME_WAIT) because
// of the graceful close. It also makes things go a little faster.
//
2017-06-19 14:41:28 -07:00
req.set(http::field::connection, "close");
2017-06-18 18:44:28 -07:00
// Send the GET request
2017-06-19 14:41:28 -07:00
http::write(sock, req, ec);
if(ec == http::error::end_of_stream)
2017-06-18 18:44:28 -07:00
{
// This special error received on a write indicates that the
// semantics of the sent message are such that the connection
// should be closed after the response is done. We do a TCP/IP
// "half-close" here to shut down our end.
//
2017-06-19 14:41:28 -07:00
sock.shutdown(tcp::socket::shutdown_send, ec);
2017-06-19 19:42:06 -07:00
if(ec && ec != beast::errc::not_connected)
2017-06-18 18:44:28 -07:00
return fail("shutdown", ec);
}
if(ec)
{
fail("write", ec);
continue;
}
// This buffer is needed for reading
beast::multi_buffer b;
// The response will go into this object
2017-06-19 14:41:28 -07:00
http::response<http::string_body> res;
2017-06-18 18:44:28 -07:00
// Read the response
2017-06-19 14:41:28 -07:00
http::read(sock, b, res, ec);
if(ec == http::error::end_of_stream)
2017-07-20 08:01:46 -07:00
{
2017-06-18 18:44:28 -07:00
// This special error means that the other end closed the socket,
// which is what we want since we asked for Connection: close.
// However, we are going through a rather large number of servers
// and sometimes they misbehave.
ec = {};
2017-07-20 08:01:46 -07:00
}
2017-06-18 18:44:28 -07:00
else if(ec)
2017-07-20 08:01:46 -07:00
{
2017-06-18 18:44:28 -07:00
fail("read", ec);
continue;
2017-07-20 08:01:46 -07:00
}
2017-06-18 18:44:28 -07:00
// Now we do the other half of the close,
// which is to shut down the receiver.
2017-06-19 14:41:28 -07:00
sock.shutdown(tcp::socket::shutdown_receive, ec);
2017-06-19 19:42:06 -07:00
if(ec && ec != beast::errc::not_connected)
2017-06-18 18:44:28 -07:00
return fail("shutdown", ec);
std::cout << res << std::endl;
2017-07-20 08:01:46 -07:00
}
}