diff --git a/CHANGELOG.md b/CHANGELOG.md index 674a8511..35e74d09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Version 78: * Fix warning in zlib * Header file tidying * Tidy up FieldsReader doc +* Add Boost.Locale utf8 benchmark comparison HTTP: diff --git a/include/beast/websocket/detail/utf8_checker.hpp b/include/beast/websocket/detail/utf8_checker.hpp index 9feb9c66..cbd9b70c 100644 --- a/include/beast/websocket/detail/utf8_checker.hpp +++ b/include/beast/websocket/detail/utf8_checker.hpp @@ -274,7 +274,14 @@ write(std::uint8_t const* in, std::size_t size) std::size_t>(0x8080808080808080 & ~std::size_t{0}); while(in < last) { +#if 0 + std::size_t temp; + std::memcpy(&temp, in, sizeof(temp)); + if((temp & mask) != 0) +#else + // Technically UB but works on all known platforms if((*reinterpret_cast(in) & mask) != 0) +#endif { size = size - (in - in0); goto slow; diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt index e018b1a6..f6c0afcb 100644 --- a/test/benchmarks/CMakeLists.txt +++ b/test/benchmarks/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable (benchmarks buffers.cpp nodejs_parser.cpp parser.cpp + utf8_checker.cpp ) target_link_libraries(benchmarks diff --git a/test/benchmarks/Jamfile b/test/benchmarks/Jamfile index 7070bf84..a3ff3bd6 100644 --- a/test/benchmarks/Jamfile +++ b/test/benchmarks/Jamfile @@ -10,6 +10,7 @@ unit-test benchmarks : buffers.cpp nodejs_parser.cpp parser.cpp + utf8_checker.cpp : coverage:no ubasan:no diff --git a/test/benchmarks/utf8_checker.cpp b/test/benchmarks/utf8_checker.cpp new file mode 100644 index 00000000..bdb8c5da --- /dev/null +++ b/test/benchmarks/utf8_checker.cpp @@ -0,0 +1,140 @@ +// +// Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include +#include +#include + +namespace beast { + +class utf8_checker_test : public beast::unit_test::suite +{ + std::mt19937 rng_; + +public: + using size_type = std::uint64_t; + + class timer + { + using clock_type = + std::chrono::system_clock; + + clock_type::time_point when_; + + public: + using duration = + clock_type::duration; + + timer() + : when_(clock_type::now()) + { + } + + duration + elapsed() const + { + return clock_type::now() - when_; + } + }; + + template + UInt + rand(std::size_t n) + { + return static_cast( + std::uniform_int_distribution< + std::size_t>{0, n-1}(rng_)); + } + + static + inline + size_type + throughput(std::chrono::duration< + double> const& elapsed, size_type items) + { + using namespace std::chrono; + return static_cast( + 1 / (elapsed/items).count()); + } + + std::string + corpus(std::size_t n) + { + std::string s; + s.reserve(n); + while(n--) + s.push_back(static_cast( + ' ' + rand(95))); + return s; + } + + void + checkLocale(std::string const& s) + { + using namespace boost::locale; + auto p = s.begin(); + auto const e = s.end(); + while(p != e) + { + auto cp = utf::utf_traits::decode(p, e); + if(cp == utf::illegal) + break; + } + } + + void + checkBeast(std::string const& s) + { + beast::websocket::detail::check_utf8( + s.data(), s.size()); + } + + template + typename timer::clock_type::duration + test(F const& f) + { + timer t; + f(); + return t.elapsed(); + } + + void + run() override + { + auto const s = corpus(32 * 1024 * 1024); + for(int i = 0; i < 5; ++ i) + { + auto const elapsed = test([&]{ + checkBeast(s); + checkBeast(s); + checkBeast(s); + checkBeast(s); + checkBeast(s); + }); + log << "beast: " << throughput(elapsed, s.size()) << " char/s" << std::endl; + } + for(int i = 0; i < 5; ++ i) + { + auto const elapsed = test([&]{ + checkLocale(s); + checkLocale(s); + checkLocale(s); + checkLocale(s); + checkLocale(s); + }); + log << "locale: " << throughput(elapsed, s.size()) << " char/s" << std::endl; + } + pass(); + } +}; + +BEAST_DEFINE_TESTSUITE(utf8_checker,benchmarks,beast); + +} // beast +