made fast_modulo universally available in 64 bits and never used in 32 bits

This commit is contained in:
joaquintides
2022-06-26 19:13:54 +02:00
parent 2670bb149d
commit fb733483c6
2 changed files with 26 additions and 78 deletions

View File

@@ -148,10 +148,6 @@ namespace boost {
#if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0 #if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0
#define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T #define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T
#endif #endif
#endif
#if !defined(BOOST_NO_INT64_T)
#define BOOST_UNORDERED_FCA_FASTMOD_SUPPORT
#endif #endif
template <class = void> struct prime_fmod_size template <class = void> struct prime_fmod_size
@@ -166,10 +162,10 @@ namespace boost {
static std::size_t const sizes_len; static std::size_t const sizes_len;
static std::size_t (*positions[])(std::size_t); static std::size_t (*positions[])(std::size_t);
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
static uint64_t inv_sizes32[]; static uint64_t inv_sizes32[];
static std::size_t const inv_sizes32_len; static std::size_t const inv_sizes32_len;
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
static inline std::size_t size_index(std::size_t n) static inline std::size_t size_index(std::size_t n)
{ {
@@ -192,7 +188,7 @@ namespace boost {
return hash % Size; return hash % Size;
} }
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// We emulate the techniques taken from: // We emulate the techniques taken from:
// Faster Remainder by Direct Computation: Applications to Compilers and // Faster Remainder by Direct Computation: Applications to Compilers and
// Software Libraries // Software Libraries
@@ -204,14 +200,15 @@ namespace boost {
static inline uint64_t get_remainder(uint64_t fractional, uint32_t d) static inline uint64_t get_remainder(uint64_t fractional, uint32_t d)
{ {
#if defined(_MSC_VER) && defined(_WIN64) #if defined(_MSC_VER)
// use MSVC instrinsic when available to avoid promotion to 128 bits // use MSVC intrinsics when available to avoid promotion to 128 bits
return __umulh(fractional, d); return __umulh(fractional, d);
#elif defined(BOOST_HAS_INT128) #elif defined(BOOST_HAS_INT128)
return static_cast<uint64_t>(((boost::uint128_type)fractional * d) >> 64); return static_cast<uint64_t>(((boost::uint128_type)fractional * d) >> 64);
#else #else
// portable implementation in the absence of boost::uint128_type // portable implementation in the absence of boost::uint128_type on 64 bits,
// which happens at least in GCC 4.5 and prior
uint64_t r1 = (fractional & UINT32_MAX) * d; uint64_t r1 = (fractional & UINT32_MAX) * d;
uint64_t r2 = (fractional >> 32 ) * d; uint64_t r2 = (fractional >> 32 ) * d;
@@ -225,12 +222,11 @@ namespace boost {
uint64_t fractional = M * a; uint64_t fractional = M * a;
return (uint32_t)(get_remainder(fractional, d)); return (uint32_t)(get_remainder(fractional, d));
} }
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
static inline std::size_t position( static inline std::size_t position(
std::size_t hash, std::size_t size_index) std::size_t hash, std::size_t size_index)
{ {
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
std::size_t sizes_under_32bit = inv_sizes32_len; std::size_t sizes_under_32bit = inv_sizes32_len;
if (BOOST_LIKELY(size_index < sizes_under_32bit)) { if (BOOST_LIKELY(size_index < sizes_under_32bit)) {
@@ -239,13 +235,9 @@ namespace boost {
} else { } else {
return positions[size_index - sizes_under_32bit](hash); return positions[size_index - sizes_under_32bit](hash);
} }
#else
return fast_modulo(
hash, inv_sizes32[size_index], uint32_t(sizes[size_index]));
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
#else #else
return positions[size_index](hash); return positions[size_index](hash);
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
} }
}; // prime_fmod_size }; // prime_fmod_size
@@ -313,7 +305,7 @@ namespace boost {
// Similarly here, we have to re-express the integer initialization using // Similarly here, we have to re-express the integer initialization using
// arithmetic such that each literal can fit in a 32-bit value. // arithmetic such that each literal can fit in a 32-bit value.
// //
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// clang-format off // clang-format off
template <class T> template <class T>
uint64_t prime_fmod_size<T>::inv_sizes32[] = { uint64_t prime_fmod_size<T>::inv_sizes32[] = {
@@ -346,32 +338,25 @@ namespace boost {
(boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */, (boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */,
(boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */, (boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */,
(boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */, (boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */,
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
};
#else
(boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(6ul) /* 4294967302 */
}; };
// clang-format on // clang-format on
#endif /* !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
template <class T> template <class T>
std::size_t const std::size_t const
prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) / prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) /
sizeof(inv_sizes32[0]); sizeof(inv_sizes32[0]);
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
#define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \ #define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \
prime_fmod_size<T>::template modulo<n>, prime_fmod_size<T>::template modulo<n>,
template <class T> template <class T>
std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = { std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = {
#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT) BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT)
#endif #else
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT) BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
#endif #endif

View File

@@ -28,15 +28,6 @@ void macros_test()
"BOOST_UNORDERED_FCA_HAS_64B_SIZE_T is defined"); "BOOST_UNORDERED_FCA_HAS_64B_SIZE_T is defined");
#endif #endif
} }
#if ((defined(__GNUC__) || defined(__clang__)) && \
defined(__SIZEOF_INT128__)) || \
(defined(_MSC_VER) && defined(_M_X64))
#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
BOOST_ERROR("fast_modulo support should be enabled for this toolchain but "
"it's currently not");
#endif
#endif
} }
// Pretty inefficient, but the test is fast enough. // Pretty inefficient, but the test is fast enough.
@@ -93,7 +84,7 @@ void prime_sizes_test()
BOOST_TEST_GT(sizes[i], sizes[i - 1]); BOOST_TEST_GT(sizes[i], sizes[i - 1]);
} }
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// now we wish to prove that if we do have the reciprocals stored, we have the // now we wish to prove that if we do have the reciprocals stored, we have the
// correct amount of them, i.e. one for every entry in sizes[] that fits in 32 // correct amount of them, i.e. one for every entry in sizes[] that fits in 32
// bits // bits
@@ -140,13 +131,14 @@ void prime_sizes_test()
void get_remainder_test() void get_remainder_test()
{ {
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
struct struct
{ {
// internally, our get_remainder() function will rely on either msvc // boost::unordered::detail::prime_fmod_size<>::get_remainder
// intrinsics or 128 bit integer support which isn't always available. This // uses several internal implementations depending on the availability of
// is a slower, portable version we can use for verification of the // certain intrinsics or 128 bit integer support, defaulting to a slow,
// routines. // portable routine. The following is a transcription of the portable
// routine used here for verification purposes.
// //
boost::uint64_t operator()(boost::uint64_t f, boost::uint32_t d) boost::uint64_t operator()(boost::uint64_t f, boost::uint32_t d)
{ {
@@ -178,7 +170,7 @@ void get_remainder_test()
#endif #endif
} }
void modulo32_test() void modulo_test()
{ {
std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes; std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;
@@ -188,39 +180,12 @@ void modulo32_test()
boost::detail::splitmix64 rng; boost::detail::splitmix64 rng;
for (std::size_t i = 0; i < 1000000u; ++i) { for (std::size_t i = 0; i < 1000000u; ++i) {
std::size_t hash = static_cast<boost::uint32_t>(rng()); std::size_t hash = static_cast<std::size_t>(rng());
for (std::size_t j = 0; j < sizes_len; ++j) {
std::size_t p1 =
boost::unordered::detail::prime_fmod_size<>::position(hash, j);
std::size_t p2 = hash % sizes[j];
if (!BOOST_TEST_EQ(p1, p2)) {
std::cerr << "hash: " << hash << ", j: " << j << ", sizes[" << j
<< "]: " << sizes[j] << std::endl;
return;
}
}
}
}
void modulo64_test()
{
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;
std::size_t const sizes_len =
boost::unordered::detail::prime_fmod_size<>::sizes_len;
boost::detail::splitmix64 rng;
for (std::size_t i = 0; i < 1000000u; ++i) {
std::size_t hash = rng();
for (std::size_t j = 0; j < sizes_len; ++j) { for (std::size_t j = 0; j < sizes_len; ++j) {
std::size_t h = hash; std::size_t h = hash;
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
if (sizes[j] <= UINT_MAX) { if (sizes[j] <= UINT_MAX) {
h = boost::uint32_t(h) + boost::uint32_t(h >> 32); h = boost::uint32_t(h) + boost::uint32_t(h >> 32);
} }
@@ -237,16 +202,14 @@ void modulo64_test()
} }
} }
} }
#endif
} }
int main() int main()
{ {
macros_test(); macros_test();
prime_sizes_test(); prime_sizes_test();
modulo32_test();
get_remainder_test(); get_remainder_test();
modulo64_test(); modulo_test();
return boost::report_errors(); return boost::report_errors();
} }