added Neon support

This commit is contained in:
joaquintides
2022-09-28 11:21:44 +02:00
parent 5293b328b7
commit 5a4a2f3ffd

View File

@ -34,8 +34,8 @@
defined(_M_X64)||(defined(_M_IX86_FP)&&_M_IX86_FP>=2)
#define BOOST_UNORDERED_SSE2
#include <emmintrin.h>
#elif defined(__ARM_NEON)
#define BOOST_UNORDERED_NEON
#elif defined(__ARM_NEON)&&!defined(__ARM_BIG_ENDIAN)
#define BOOST_UNORDERED_LITTLE_ENDIAN_NEON
#include <arm_neon.h>
#endif
@ -213,6 +213,146 @@ private:
__m128i m;
};
#elif BOOST_UNORDERED_LITTLE_ENDIAN_NEON
struct group15
{
static constexpr int N=15;
inline void set(std::size_t pos,std::size_t hash)
{
BOOST_ASSERT(pos<N);
at(pos)=adjust_hash(hash);
}
inline void set_sentinel()
{
at(N-1)=sentinel_;
}
inline bool is_sentinel(std::size_t pos)const
{
BOOST_ASSERT(pos<N);
return at(pos)==sentinel_;
}
inline void reset(std::size_t pos)
{
BOOST_ASSERT(pos<N);
at(pos)=available_;
}
static inline void reset(unsigned char* pc)
{
*pc=available_;
}
inline int match(std::size_t hash)const
{
return simde_mm_movemask_epi8(
vceqq_s8(m,vdupq_n_s8(adjust_hash(hash))))&0x7FFF;
}
inline bool is_not_overflowed(std::size_t hash)const
{
static constexpr unsigned char shift[]={1,2,4,8,16,32,64,128};
return !(overflow()&shift[hash%8]);
}
inline void mark_overflow(std::size_t hash)
{
overflow()|=static_cast<unsigned char>(1<<(hash%8));
}
inline int match_available()const
{
return simde_mm_movemask_epi8(vceqq_s8(m,vdupq_n_s8(0)))&0x7FFF;
}
inline int match_occupied()const
{
return simde_mm_movemask_epi8(
vcgtq_u8(vreinterpretq_u8_s8(m),vdupq_n_u8(0)))&0x7FFF;
}
inline int match_really_occupied()const /* excluding sentinel */
{
return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied();
}
static constexpr struct{alignas(16) unsigned char storage[N+1];} dummy_group=
{{0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0}};
private:
static constexpr unsigned char available_=0,
sentinel_=1;
inline static unsigned char adjust_hash(unsigned char hash)
{
static constexpr unsigned char table[]={
2,3,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
};
return table[hash];
}
/* copied from https://github.com/simd-everywhere/simde/blob/master/simde/x86/sse2.h#L3763 */
static inline int simde_mm_movemask_epi8(uint8x16_t a)
{
static const uint8_t md[16]={
1 << 0, 1 << 1, 1 << 2, 1 << 3,
1 << 4, 1 << 5, 1 << 6, 1 << 7,
1 << 0, 1 << 1, 1 << 2, 1 << 3,
1 << 4, 1 << 5, 1 << 6, 1 << 7,
};
uint8x16_t masked=vandq_u8(vld1q_u8(md),a);
uint8x8x2_t tmp=vzip_u8(vget_low_u8(masked),vget_high_u8(masked));
uint16x8_t x=vreinterpretq_u16_u8(vcombine_u8(tmp.val[0],tmp.val[1]));
return vaddvq_u16(x);
}
inline unsigned char& at(std::size_t pos)
{
return reinterpret_cast<unsigned char*>(&m)[pos];
}
inline unsigned char at(std::size_t pos)const
{
return reinterpret_cast<const unsigned char*>(&m)[pos];
}
inline unsigned char& overflow()
{
return at(N);
}
inline unsigned char overflow()const
{
return at(N);
}
int8x16_t m;
};
#endif
inline unsigned int unchecked_countr_zero(int x)
@ -981,8 +1121,8 @@ private:
#undef BOOST_UNORDERED_ASSUME
#undef BOOST_UNORDERED_HAS_BUILTIN
#ifdef BOOST_UNORDERED_NEON
#undef BOOST_UNORDERED_NEON
#ifdef BOOST_UNORDERED_LITTLE_ENDIAN_NEON
#undef BOOST_UNORDERED_LITTLE_ENDIAN_NEON
#endif
#ifdef BOOST_UNORDERED_SSE2
#undef BOOST_UNORDERED_SSE2