Optimize mask operations

This commit is contained in:
Vinnie Falco
2016-11-02 08:21:35 -04:00
parent d91fbd0611
commit 312bd6b21d
2 changed files with 87 additions and 38 deletions

View File

@ -1,6 +1,9 @@
1.0.0-b19 1.0.0-b19
WebSocket
* Optimize utf8 validation * Optimize utf8 validation
* Optimize mask operations
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------

View File

@ -105,73 +105,119 @@ ror(T t, unsigned n = 1)
static_cast<typename std::make_unsigned<T>::type>(t) >> n)); static_cast<typename std::make_unsigned<T>::type>(t) >> n));
} }
// 32-bit Unoptimized // 32-bit optimized
// //
template<class = void> template<class = void>
void void
mask_inplace_general( mask_inplace_fast(
boost::asio::mutable_buffer const& b, boost::asio::mutable_buffer const& b,
std::uint32_t& key) std::uint32_t& key)
{ {
using boost::asio::buffer_cast; using boost::asio::buffer_cast;
using boost::asio::buffer_size; using boost::asio::buffer_size;
auto const n = buffer_size(b); auto n = buffer_size(b);
auto p = buffer_cast<std::uint8_t*>(b); auto p = buffer_cast<std::uint8_t*>(b);
if(n >= sizeof(key))
{
// Bring p to 4-byte alignment
auto const i = reinterpret_cast<
std::uintptr_t>(p) & (sizeof(key)-1);
switch(i)
{
case 1: p[2] ^= static_cast<std::uint8_t>(key >> 16);
case 2: p[1] ^= static_cast<std::uint8_t>(key >> 8);
case 3: p[0] ^= static_cast<std::uint8_t>(key);
{
auto const d = static_cast<
unsigned>(sizeof(key) - i);
key = ror(key, 8*d);
n -= d;
p += d;
}
default:
break;
}
}
// Mask 4 bytes at a time
for(auto i = n / sizeof(key); i; --i) for(auto i = n / sizeof(key); i; --i)
{ {
*p ^= key ; ++p; *reinterpret_cast<
*p ^= (key >> 8); ++p; std::uint32_t*>(p) ^= key;
*p ^= (key >>16); ++p; p += sizeof(key);
*p ^= (key >>24); ++p;
} }
auto const m =
static_cast<std::uint8_t>(n % sizeof(key)); // Leftovers
switch(m) n &= sizeof(key)-1;
switch(n)
{ {
case 3: p[2] ^= (key >>16); case 3: p[2] ^= static_cast<std::uint8_t>(key >> 16);
case 2: p[1] ^= (key >> 8); case 2: p[1] ^= static_cast<std::uint8_t>(key >> 8);
case 1: p[0] ^= key; case 1: p[0] ^= static_cast<std::uint8_t>(key);
key = ror(key, m*8); key = ror(key, static_cast<unsigned>(8*n));
default: default:
break; break;
} }
} }
// 64-bit unoptimized // 64-bit optimized
// //
template<class = void> template<class = void>
void void
mask_inplace_general( mask_inplace_fast(
boost::asio::mutable_buffer const& b, boost::asio::mutable_buffer const& b,
std::uint64_t& key) std::uint64_t& key)
{ {
using boost::asio::buffer_cast; using boost::asio::buffer_cast;
using boost::asio::buffer_size; using boost::asio::buffer_size;
auto const n = buffer_size(b); auto n = buffer_size(b);
auto p = buffer_cast<std::uint8_t*>(b); auto p = buffer_cast<std::uint8_t*>(b);
if(n >= sizeof(key))
{
// Bring p to 8-byte alignment
auto const i = reinterpret_cast<
std::uintptr_t>(p) & (sizeof(key)-1);
switch(i)
{
case 1: p[6] ^= static_cast<std::uint8_t>(key >> 48);
case 2: p[5] ^= static_cast<std::uint8_t>(key >> 40);
case 3: p[4] ^= static_cast<std::uint8_t>(key >> 32);
case 4: p[3] ^= static_cast<std::uint8_t>(key >> 24);
case 5: p[2] ^= static_cast<std::uint8_t>(key >> 16);
case 6: p[1] ^= static_cast<std::uint8_t>(key >> 8);
case 7: p[0] ^= static_cast<std::uint8_t>(key);
{
auto const d = static_cast<
unsigned>(sizeof(key) - i);
key = ror(key, 8*d);
n -= d;
p += d;
}
default:
break;
}
}
// Mask 8 bytes at a time
for(auto i = n / sizeof(key); i; --i) for(auto i = n / sizeof(key); i; --i)
{ {
*p ^= key ; ++p; *reinterpret_cast<
*p ^= (key >> 8); ++p; std::uint64_t*>(p) ^= key;
*p ^= (key >>16); ++p; p += sizeof(key);
*p ^= (key >>24); ++p;
*p ^= (key >>32); ++p;
*p ^= (key >>40); ++p;
*p ^= (key >>48); ++p;
*p ^= (key >>56); ++p;
} }
auto const m =
static_cast<std::uint8_t>(n % sizeof(key)); // Leftovers
switch(m) n &= sizeof(key)-1;
switch(n)
{ {
case 7: p[6] ^= (key >>16); case 7: p[6] ^= static_cast<std::uint8_t>(key >> 48);
case 6: p[5] ^= (key >> 8); case 6: p[5] ^= static_cast<std::uint8_t>(key >> 40);
case 5: p[4] ^= key; case 5: p[4] ^= static_cast<std::uint8_t>(key >> 32);
case 4: p[3] ^= (key >>24); case 4: p[3] ^= static_cast<std::uint8_t>(key >> 24);
case 3: p[2] ^= (key >>16); case 3: p[2] ^= static_cast<std::uint8_t>(key >> 16);
case 2: p[1] ^= (key >> 8); case 2: p[1] ^= static_cast<std::uint8_t>(key >> 8);
case 1: p[0] ^= key; case 1: p[0] ^= static_cast<std::uint8_t>(key);
key = ror(key, m*8); key = ror(key, static_cast<unsigned>(8*n));
default: default:
break; break;
} }
@ -183,7 +229,7 @@ mask_inplace(
boost::asio::mutable_buffer const& b, boost::asio::mutable_buffer const& b,
std::uint32_t& key) std::uint32_t& key)
{ {
mask_inplace_general(b, key); mask_inplace_fast(b, key);
} }
inline inline
@ -192,7 +238,7 @@ mask_inplace(
boost::asio::mutable_buffer const& b, boost::asio::mutable_buffer const& b,
std::uint64_t& key) std::uint64_t& key)
{ {
mask_inplace_general(b, key); mask_inplace_fast(b, key);
} }
// Apply mask in place // Apply mask in place