diff --git a/include/boost/unordered/detail/foa.hpp b/include/boost/unordered/detail/foa.hpp index 282e9dc2..878a2073 100644 --- a/include/boost/unordered/detail/foa.hpp +++ b/include/boost/unordered/detail/foa.hpp @@ -34,8 +34,8 @@ defined(_M_X64)||(defined(_M_IX86_FP)&&_M_IX86_FP>=2) #define BOOST_UNORDERED_SSE2 #include -#elif defined(__ARM_NEON) -#define BOOST_UNORDERED_NEON +#elif defined(__ARM_NEON)&&!defined(__ARM_BIG_ENDIAN) +#define BOOST_UNORDERED_LITTLE_ENDIAN_NEON #include #endif @@ -213,6 +213,146 @@ private: __m128i m; }; + +#elif BOOST_UNORDERED_LITTLE_ENDIAN_NEON + +struct group15 +{ + static constexpr int N=15; + + inline void set(std::size_t pos,std::size_t hash) + { + BOOST_ASSERT(pos(1<<(hash%8)); + } + + inline int match_available()const + { + return simde_mm_movemask_epi8(vceqq_s8(m,vdupq_n_s8(0)))&0x7FFF; + } + + inline int match_occupied()const + { + return simde_mm_movemask_epi8( + vcgtq_u8(vreinterpretq_u8_s8(m),vdupq_n_u8(0)))&0x7FFF; + } + + inline int match_really_occupied()const /* excluding sentinel */ + { + return at(N-1)==sentinel_?match_occupied()&0x3FFF:match_occupied(); + } + + static constexpr struct{alignas(16) unsigned char storage[N+1];} dummy_group= + {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0}}; + +private: + static constexpr unsigned char available_=0, + sentinel_=1; + + inline static unsigned char adjust_hash(unsigned char hash) + { + static constexpr unsigned char table[]={ + 2,3,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, + 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, + 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, + 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, + 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, + }; + + return table[hash]; + } + + /* copied from https://github.com/simd-everywhere/simde/blob/master/simde/x86/sse2.h#L3763 */ + + static inline int simde_mm_movemask_epi8(uint8x16_t a) + { + static const uint8_t md[16]={ + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + uint8x16_t masked=vandq_u8(vld1q_u8(md),a); + uint8x8x2_t tmp=vzip_u8(vget_low_u8(masked),vget_high_u8(masked)); + uint16x8_t x=vreinterpretq_u16_u8(vcombine_u8(tmp.val[0],tmp.val[1])); + return vaddvq_u16(x); + } + + inline unsigned char& at(std::size_t pos) + { + return reinterpret_cast(&m)[pos]; + } + + inline unsigned char at(std::size_t pos)const + { + return reinterpret_cast(&m)[pos]; + } + + inline unsigned char& overflow() + { + return at(N); + } + + inline unsigned char overflow()const + { + return at(N); + } + + int8x16_t m; +}; + #endif inline unsigned int unchecked_countr_zero(int x) @@ -981,8 +1121,8 @@ private: #undef BOOST_UNORDERED_ASSUME #undef BOOST_UNORDERED_HAS_BUILTIN -#ifdef BOOST_UNORDERED_NEON -#undef BOOST_UNORDERED_NEON +#ifdef BOOST_UNORDERED_LITTLE_ENDIAN_NEON +#undef BOOST_UNORDERED_LITTLE_ENDIAN_NEON #endif #ifdef BOOST_UNORDERED_SSE2 #undef BOOST_UNORDERED_SSE2