optimized SSE2-based group15

This commit is contained in:
joaquintides
2022-12-04 12:04:04 +01:00
parent 9716731864
commit f8d882b6eb

View File

@ -135,6 +135,7 @@ static const std::size_t default_bucket_count = 0;
#if defined(BOOST_UNORDERED_SSE2)
#if 0
struct group15
{
static constexpr int N=15;
@ -299,6 +300,157 @@ private:
alignas(16) __m128i m;
};
#else
struct group15
{
static constexpr int N=15;
struct dummy_group_type
{
alignas(16) unsigned char storage[N+1]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0};
};
inline void initialize(){m=_mm_setzero_si128();}
inline void set(std::size_t pos,std::size_t hash)
{
BOOST_ASSERT(pos<N);
at(pos)=reduced_hash(hash+pos);
}
inline void set_sentinel()
{
at(N-1)=sentinel_;
}
inline bool is_sentinel(std::size_t pos)const
{
BOOST_ASSERT(pos<N);
return at(pos)==sentinel_;
}
inline void reset(std::size_t pos)
{
BOOST_ASSERT(pos<N);
at(pos)=available_;
}
static inline void reset(unsigned char* pc)
{
*pc=available_;
}
inline int match(std::size_t hash)const
{
return _mm_movemask_epi8(_mm_cmpeq_epi8(m,match_word(hash)))&0x7FFF;
}
inline bool is_not_overflowed(std::size_t hash)const
{
static constexpr unsigned char shift[]={1,2,4,8,16,32,64,128};
return !(overflow()&shift[hash%8]);
}
inline void mark_overflow(std::size_t hash)
{
#if BOOST_WORKAROUND(BOOST_GCC, >= 50000 && BOOST_GCC < 60000)
overflow() = static_cast<unsigned char>( overflow() | static_cast<unsigned char>(1<<(hash%8)) );
#else
overflow()|=static_cast<unsigned char>(1<<(hash%8));
#endif
}
static inline bool maybe_caused_overflow(unsigned char* pc)
{
std::size_t pos=reinterpret_cast<uintptr_t>(pc)%sizeof(group15);
group15 *pg=reinterpret_cast<group15*>(pc-pos);
return !pg->is_not_overflowed(*pc-pos);
};
inline int match_available()const
{
return _mm_movemask_epi8(
_mm_cmpeq_epi8(m,_mm_setzero_si128()))&0x7FFF;
}
inline int match_occupied()const
{
return (~match_available())&0x7FFF;
}
inline int match_really_occupied()const /* excluding sentinel */
{
return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied();
}
private:
static constexpr unsigned char available_=0,
sentinel_=1;
static constexpr unsigned char reduced_hash_table[]={
8,9,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
8,9,2,3,4,5,6,7,8,9,10,11,12,13,14,
};
inline static __m128i match_word(std::size_t hash)
{
#if defined(__MSVC_RUNTIME_CHECKS)
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(
reduced_hash_table+(hash&0xffu)));
#else
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(
reduced_hash_table+(unsigned char)(hash)));
#endif
}
inline static unsigned char reduced_hash(std::size_t hash)
{
#if defined(__MSVC_RUNTIME_CHECKS)
return reduced_hash_table[hash&0xffu];
#else
return reduced_hash_table[(unsigned char)hash];
#endif
}
inline unsigned char& at(std::size_t pos)
{
return reinterpret_cast<unsigned char*>(&m)[pos];
}
inline unsigned char at(std::size_t pos)const
{
return reinterpret_cast<const unsigned char*>(&m)[pos];
}
inline unsigned char& overflow()
{
return at(N);
}
inline unsigned char overflow()const
{
return at(N);
}
alignas(16) __m128i m;
};
#endif
#elif defined(BOOST_UNORDERED_LITTLE_ENDIAN_NEON)