From fb733483c6d2c3624839974ff2531bb0b304c4e5 Mon Sep 17 00:00:00 2001 From: joaquintides Date: Sun, 26 Jun 2022 19:13:54 +0200 Subject: [PATCH] made fast_modulo universally available in 64 bits and never used in 32 bits --- include/boost/unordered/detail/fca.hpp | 45 +++++++------------- test/unordered/prime_fmod_tests.cpp | 59 +++++--------------------- 2 files changed, 26 insertions(+), 78 deletions(-) diff --git a/include/boost/unordered/detail/fca.hpp b/include/boost/unordered/detail/fca.hpp index e04562ff..7f13d0c7 100644 --- a/include/boost/unordered/detail/fca.hpp +++ b/include/boost/unordered/detail/fca.hpp @@ -148,10 +148,6 @@ namespace boost { #if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0 #define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T #endif -#endif - -#if !defined(BOOST_NO_INT64_T) -#define BOOST_UNORDERED_FCA_FASTMOD_SUPPORT #endif template struct prime_fmod_size @@ -166,10 +162,10 @@ namespace boost { static std::size_t const sizes_len; static std::size_t (*positions[])(std::size_t); -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) static uint64_t inv_sizes32[]; static std::size_t const inv_sizes32_len; -#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ +#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ static inline std::size_t size_index(std::size_t n) { @@ -192,7 +188,7 @@ namespace boost { return hash % Size; } -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) // We emulate the techniques taken from: // Faster Remainder by Direct Computation: Applications to Compilers and // Software Libraries @@ -204,14 +200,15 @@ namespace boost { static inline uint64_t get_remainder(uint64_t fractional, uint32_t d) { -#if defined(_MSC_VER) && defined(_WIN64) - // use MSVC instrinsic when available to avoid promotion to 128 bits +#if defined(_MSC_VER) + // use MSVC intrinsics when available to avoid promotion to 128 bits return __umulh(fractional, d); #elif defined(BOOST_HAS_INT128) return static_cast(((boost::uint128_type)fractional * d) >> 64); #else - // portable implementation in the absence of boost::uint128_type + // portable implementation in the absence of boost::uint128_type on 64 bits, + // which happens at least in GCC 4.5 and prior uint64_t r1 = (fractional & UINT32_MAX) * d; uint64_t r2 = (fractional >> 32 ) * d; @@ -225,12 +222,11 @@ namespace boost { uint64_t fractional = M * a; return (uint32_t)(get_remainder(fractional, d)); } -#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ +#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ static inline std::size_t position( std::size_t hash, std::size_t size_index) { -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) std::size_t sizes_under_32bit = inv_sizes32_len; if (BOOST_LIKELY(size_index < sizes_under_32bit)) { @@ -239,13 +235,9 @@ namespace boost { } else { return positions[size_index - sizes_under_32bit](hash); } -#else - return fast_modulo( - hash, inv_sizes32[size_index], uint32_t(sizes[size_index])); -#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ #else return positions[size_index](hash); -#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ +#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ } }; // prime_fmod_size @@ -313,7 +305,7 @@ namespace boost { // Similarly here, we have to re-express the integer initialization using // arithmetic such that each literal can fit in a 32-bit value. // -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) // clang-format off template uint64_t prime_fmod_size::inv_sizes32[] = { @@ -346,34 +338,27 @@ namespace boost { (boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */, (boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */, (boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */, -#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) - }; -#else - (boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(6ul) /* 4294967302 */ }; // clang-format on -#endif /* !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ template std::size_t const prime_fmod_size::inv_sizes32_len = sizeof(inv_sizes32) / sizeof(inv_sizes32[0]); -#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */ +#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ #define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \ prime_fmod_size::template modulo, template std::size_t (*prime_fmod_size::positions[])(std::size_t) = { -#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT) -#endif - -#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) - BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, - BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT) +#else + BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, + BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT) #endif }; diff --git a/test/unordered/prime_fmod_tests.cpp b/test/unordered/prime_fmod_tests.cpp index 231fd913..c9e22cc6 100644 --- a/test/unordered/prime_fmod_tests.cpp +++ b/test/unordered/prime_fmod_tests.cpp @@ -28,15 +28,6 @@ void macros_test() "BOOST_UNORDERED_FCA_HAS_64B_SIZE_T is defined"); #endif } - -#if ((defined(__GNUC__) || defined(__clang__)) && \ - defined(__SIZEOF_INT128__)) || \ - (defined(_MSC_VER) && defined(_M_X64)) -#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) - BOOST_ERROR("fast_modulo support should be enabled for this toolchain but " - "it's currently not"); -#endif -#endif } // Pretty inefficient, but the test is fast enough. @@ -93,7 +84,7 @@ void prime_sizes_test() BOOST_TEST_GT(sizes[i], sizes[i - 1]); } -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) // now we wish to prove that if we do have the reciprocals stored, we have the // correct amount of them, i.e. one for every entry in sizes[] that fits in 32 // bits @@ -140,13 +131,14 @@ void prime_sizes_test() void get_remainder_test() { -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) struct { - // internally, our get_remainder() function will rely on either msvc - // intrinsics or 128 bit integer support which isn't always available. This - // is a slower, portable version we can use for verification of the - // routines. + // boost::unordered::detail::prime_fmod_size<>::get_remainder + // uses several internal implementations depending on the availability of + // certain intrinsics or 128 bit integer support, defaulting to a slow, + // portable routine. The following is a transcription of the portable + // routine used here for verification purposes. // boost::uint64_t operator()(boost::uint64_t f, boost::uint32_t d) { @@ -178,7 +170,7 @@ void get_remainder_test() #endif } -void modulo32_test() +void modulo_test() { std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes; @@ -188,39 +180,12 @@ void modulo32_test() boost::detail::splitmix64 rng; for (std::size_t i = 0; i < 1000000u; ++i) { - std::size_t hash = static_cast(rng()); - - for (std::size_t j = 0; j < sizes_len; ++j) { - std::size_t p1 = - boost::unordered::detail::prime_fmod_size<>::position(hash, j); - - std::size_t p2 = hash % sizes[j]; - if (!BOOST_TEST_EQ(p1, p2)) { - std::cerr << "hash: " << hash << ", j: " << j << ", sizes[" << j - << "]: " << sizes[j] << std::endl; - return; - } - } - } -} - -void modulo64_test() -{ -#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) - std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes; - - std::size_t const sizes_len = - boost::unordered::detail::prime_fmod_size<>::sizes_len; - - boost::detail::splitmix64 rng; - - for (std::size_t i = 0; i < 1000000u; ++i) { - std::size_t hash = rng(); + std::size_t hash = static_cast(rng()); for (std::size_t j = 0; j < sizes_len; ++j) { std::size_t h = hash; -#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) +#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) if (sizes[j] <= UINT_MAX) { h = boost::uint32_t(h) + boost::uint32_t(h >> 32); } @@ -237,16 +202,14 @@ void modulo64_test() } } } -#endif } int main() { macros_test(); prime_sizes_test(); - modulo32_test(); get_remainder_test(); - modulo64_test(); + modulo_test(); return boost::report_errors(); }