From 312bd6b21da6d40bccc19bd9a866af6e096fd1b7 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 2 Nov 2016 08:21:35 -0400 Subject: [PATCH] Optimize mask operations --- CHANGELOG.md | 3 + include/beast/websocket/detail/mask.hpp | 122 ++++++++++++++++-------- 2 files changed, 87 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11759d56..5a498eb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ 1.0.0-b19 +WebSocket + * Optimize utf8 validation +* Optimize mask operations -------------------------------------------------------------------------------- diff --git a/include/beast/websocket/detail/mask.hpp b/include/beast/websocket/detail/mask.hpp index 12254ea7..02ca38fd 100644 --- a/include/beast/websocket/detail/mask.hpp +++ b/include/beast/websocket/detail/mask.hpp @@ -105,73 +105,119 @@ ror(T t, unsigned n = 1) static_cast::type>(t) >> n)); } -// 32-bit Unoptimized +// 32-bit optimized // template void -mask_inplace_general( +mask_inplace_fast( boost::asio::mutable_buffer const& b, std::uint32_t& key) { using boost::asio::buffer_cast; using boost::asio::buffer_size; - auto const n = buffer_size(b); + auto n = buffer_size(b); auto p = buffer_cast(b); + if(n >= sizeof(key)) + { + // Bring p to 4-byte alignment + auto const i = reinterpret_cast< + std::uintptr_t>(p) & (sizeof(key)-1); + switch(i) + { + case 1: p[2] ^= static_cast(key >> 16); + case 2: p[1] ^= static_cast(key >> 8); + case 3: p[0] ^= static_cast(key); + { + auto const d = static_cast< + unsigned>(sizeof(key) - i); + key = ror(key, 8*d); + n -= d; + p += d; + } + default: + break; + } + } + + // Mask 4 bytes at a time for(auto i = n / sizeof(key); i; --i) { - *p ^= key ; ++p; - *p ^= (key >> 8); ++p; - *p ^= (key >>16); ++p; - *p ^= (key >>24); ++p; + *reinterpret_cast< + std::uint32_t*>(p) ^= key; + p += sizeof(key); } - auto const m = - static_cast(n % sizeof(key)); - switch(m) + + // Leftovers + n &= sizeof(key)-1; + switch(n) { - case 3: p[2] ^= (key >>16); - case 2: p[1] ^= (key >> 8); - case 1: p[0] ^= key; - key = ror(key, m*8); + case 3: p[2] ^= static_cast(key >> 16); + case 2: p[1] ^= static_cast(key >> 8); + case 1: p[0] ^= static_cast(key); + key = ror(key, static_cast(8*n)); default: break; } } -// 64-bit unoptimized +// 64-bit optimized // template void -mask_inplace_general( +mask_inplace_fast( boost::asio::mutable_buffer const& b, std::uint64_t& key) { using boost::asio::buffer_cast; using boost::asio::buffer_size; - auto const n = buffer_size(b); + auto n = buffer_size(b); auto p = buffer_cast(b); + if(n >= sizeof(key)) + { + // Bring p to 8-byte alignment + auto const i = reinterpret_cast< + std::uintptr_t>(p) & (sizeof(key)-1); + switch(i) + { + case 1: p[6] ^= static_cast(key >> 48); + case 2: p[5] ^= static_cast(key >> 40); + case 3: p[4] ^= static_cast(key >> 32); + case 4: p[3] ^= static_cast(key >> 24); + case 5: p[2] ^= static_cast(key >> 16); + case 6: p[1] ^= static_cast(key >> 8); + case 7: p[0] ^= static_cast(key); + { + auto const d = static_cast< + unsigned>(sizeof(key) - i); + key = ror(key, 8*d); + n -= d; + p += d; + } + default: + break; + } + } + + // Mask 8 bytes at a time for(auto i = n / sizeof(key); i; --i) { - *p ^= key ; ++p; - *p ^= (key >> 8); ++p; - *p ^= (key >>16); ++p; - *p ^= (key >>24); ++p; - *p ^= (key >>32); ++p; - *p ^= (key >>40); ++p; - *p ^= (key >>48); ++p; - *p ^= (key >>56); ++p; + *reinterpret_cast< + std::uint64_t*>(p) ^= key; + p += sizeof(key); } - auto const m = - static_cast(n % sizeof(key)); - switch(m) + + // Leftovers + n &= sizeof(key)-1; + switch(n) { - case 7: p[6] ^= (key >>16); - case 6: p[5] ^= (key >> 8); - case 5: p[4] ^= key; - case 4: p[3] ^= (key >>24); - case 3: p[2] ^= (key >>16); - case 2: p[1] ^= (key >> 8); - case 1: p[0] ^= key; - key = ror(key, m*8); + case 7: p[6] ^= static_cast(key >> 48); + case 6: p[5] ^= static_cast(key >> 40); + case 5: p[4] ^= static_cast(key >> 32); + case 4: p[3] ^= static_cast(key >> 24); + case 3: p[2] ^= static_cast(key >> 16); + case 2: p[1] ^= static_cast(key >> 8); + case 1: p[0] ^= static_cast(key); + key = ror(key, static_cast(8*n)); default: break; } @@ -183,7 +229,7 @@ mask_inplace( boost::asio::mutable_buffer const& b, std::uint32_t& key) { - mask_inplace_general(b, key); + mask_inplace_fast(b, key); } inline @@ -192,7 +238,7 @@ mask_inplace( boost::asio::mutable_buffer const& b, std::uint64_t& key) { - mask_inplace_general(b, key); + mask_inplace_fast(b, key); } // Apply mask in place