mirror of
https://github.com/boostorg/unordered.git
synced 2025-07-29 19:07:15 +02:00
added Neon support
This commit is contained in:
@ -34,8 +34,8 @@
|
||||
defined(_M_X64)||(defined(_M_IX86_FP)&&_M_IX86_FP>=2)
|
||||
#define BOOST_UNORDERED_SSE2
|
||||
#include <emmintrin.h>
|
||||
#elif defined(__ARM_NEON)
|
||||
#define BOOST_UNORDERED_NEON
|
||||
#elif defined(__ARM_NEON)&&!defined(__ARM_BIG_ENDIAN)
|
||||
#define BOOST_UNORDERED_LITTLE_ENDIAN_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
@ -213,6 +213,146 @@ private:
|
||||
|
||||
__m128i m;
|
||||
};
|
||||
|
||||
#elif BOOST_UNORDERED_LITTLE_ENDIAN_NEON
|
||||
|
||||
struct group15
|
||||
{
|
||||
static constexpr int N=15;
|
||||
|
||||
inline void set(std::size_t pos,std::size_t hash)
|
||||
{
|
||||
BOOST_ASSERT(pos<N);
|
||||
at(pos)=adjust_hash(hash);
|
||||
}
|
||||
|
||||
inline void set_sentinel()
|
||||
{
|
||||
at(N-1)=sentinel_;
|
||||
}
|
||||
|
||||
inline bool is_sentinel(std::size_t pos)const
|
||||
{
|
||||
BOOST_ASSERT(pos<N);
|
||||
return at(pos)==sentinel_;
|
||||
}
|
||||
|
||||
inline void reset(std::size_t pos)
|
||||
{
|
||||
BOOST_ASSERT(pos<N);
|
||||
at(pos)=available_;
|
||||
}
|
||||
|
||||
static inline void reset(unsigned char* pc)
|
||||
{
|
||||
*pc=available_;
|
||||
}
|
||||
|
||||
inline int match(std::size_t hash)const
|
||||
{
|
||||
return simde_mm_movemask_epi8(
|
||||
vceqq_s8(m,vdupq_n_s8(adjust_hash(hash))))&0x7FFF;
|
||||
}
|
||||
|
||||
inline bool is_not_overflowed(std::size_t hash)const
|
||||
{
|
||||
static constexpr unsigned char shift[]={1,2,4,8,16,32,64,128};
|
||||
|
||||
return !(overflow()&shift[hash%8]);
|
||||
}
|
||||
|
||||
inline void mark_overflow(std::size_t hash)
|
||||
{
|
||||
overflow()|=static_cast<unsigned char>(1<<(hash%8));
|
||||
}
|
||||
|
||||
inline int match_available()const
|
||||
{
|
||||
return simde_mm_movemask_epi8(vceqq_s8(m,vdupq_n_s8(0)))&0x7FFF;
|
||||
}
|
||||
|
||||
inline int match_occupied()const
|
||||
{
|
||||
return simde_mm_movemask_epi8(
|
||||
vcgtq_u8(vreinterpretq_u8_s8(m),vdupq_n_u8(0)))&0x7FFF;
|
||||
}
|
||||
|
||||
inline int match_really_occupied()const /* excluding sentinel */
|
||||
{
|
||||
return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied();
|
||||
}
|
||||
|
||||
static constexpr struct{alignas(16) unsigned char storage[N+1];} dummy_group=
|
||||
{{0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0}};
|
||||
|
||||
private:
|
||||
static constexpr unsigned char available_=0,
|
||||
sentinel_=1;
|
||||
|
||||
inline static unsigned char adjust_hash(unsigned char hash)
|
||||
{
|
||||
static constexpr unsigned char table[]={
|
||||
2,3,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
|
||||
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
|
||||
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
|
||||
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
|
||||
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
|
||||
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
|
||||
};
|
||||
|
||||
return table[hash];
|
||||
}
|
||||
|
||||
/* copied from https://github.com/simd-everywhere/simde/blob/master/simde/x86/sse2.h#L3763 */
|
||||
|
||||
static inline int simde_mm_movemask_epi8(uint8x16_t a)
|
||||
{
|
||||
static const uint8_t md[16]={
|
||||
1 << 0, 1 << 1, 1 << 2, 1 << 3,
|
||||
1 << 4, 1 << 5, 1 << 6, 1 << 7,
|
||||
1 << 0, 1 << 1, 1 << 2, 1 << 3,
|
||||
1 << 4, 1 << 5, 1 << 6, 1 << 7,
|
||||
};
|
||||
|
||||
uint8x16_t masked=vandq_u8(vld1q_u8(md),a);
|
||||
uint8x8x2_t tmp=vzip_u8(vget_low_u8(masked),vget_high_u8(masked));
|
||||
uint16x8_t x=vreinterpretq_u16_u8(vcombine_u8(tmp.val[0],tmp.val[1]));
|
||||
return vaddvq_u16(x);
|
||||
}
|
||||
|
||||
inline unsigned char& at(std::size_t pos)
|
||||
{
|
||||
return reinterpret_cast<unsigned char*>(&m)[pos];
|
||||
}
|
||||
|
||||
inline unsigned char at(std::size_t pos)const
|
||||
{
|
||||
return reinterpret_cast<const unsigned char*>(&m)[pos];
|
||||
}
|
||||
|
||||
inline unsigned char& overflow()
|
||||
{
|
||||
return at(N);
|
||||
}
|
||||
|
||||
inline unsigned char overflow()const
|
||||
{
|
||||
return at(N);
|
||||
}
|
||||
|
||||
int8x16_t m;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
inline unsigned int unchecked_countr_zero(int x)
|
||||
@ -981,8 +1121,8 @@ private:
|
||||
|
||||
#undef BOOST_UNORDERED_ASSUME
|
||||
#undef BOOST_UNORDERED_HAS_BUILTIN
|
||||
#ifdef BOOST_UNORDERED_NEON
|
||||
#undef BOOST_UNORDERED_NEON
|
||||
#ifdef BOOST_UNORDERED_LITTLE_ENDIAN_NEON
|
||||
#undef BOOST_UNORDERED_LITTLE_ENDIAN_NEON
|
||||
#endif
|
||||
#ifdef BOOST_UNORDERED_SSE2
|
||||
#undef BOOST_UNORDERED_SSE2
|
||||
|
Reference in New Issue
Block a user