made fast_modulo universally available in 64 bits and never used in 32 bits

This commit is contained in:
joaquintides
2022-06-26 19:13:54 +02:00
parent 2670bb149d
commit fb733483c6
2 changed files with 26 additions and 78 deletions

View File

@ -148,10 +148,6 @@ namespace boost {
#if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0
#define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T
#endif
#endif
#if !defined(BOOST_NO_INT64_T)
#define BOOST_UNORDERED_FCA_FASTMOD_SUPPORT
#endif
template <class = void> struct prime_fmod_size
@ -166,10 +162,10 @@ namespace boost {
static std::size_t const sizes_len;
static std::size_t (*positions[])(std::size_t);
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
static uint64_t inv_sizes32[];
static std::size_t const inv_sizes32_len;
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
static inline std::size_t size_index(std::size_t n)
{
@ -192,7 +188,7 @@ namespace boost {
return hash % Size;
}
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// We emulate the techniques taken from:
// Faster Remainder by Direct Computation: Applications to Compilers and
// Software Libraries
@ -204,14 +200,15 @@ namespace boost {
static inline uint64_t get_remainder(uint64_t fractional, uint32_t d)
{
#if defined(_MSC_VER) && defined(_WIN64)
// use MSVC instrinsic when available to avoid promotion to 128 bits
#if defined(_MSC_VER)
// use MSVC intrinsics when available to avoid promotion to 128 bits
return __umulh(fractional, d);
#elif defined(BOOST_HAS_INT128)
return static_cast<uint64_t>(((boost::uint128_type)fractional * d) >> 64);
#else
// portable implementation in the absence of boost::uint128_type
// portable implementation in the absence of boost::uint128_type on 64 bits,
// which happens at least in GCC 4.5 and prior
uint64_t r1 = (fractional & UINT32_MAX) * d;
uint64_t r2 = (fractional >> 32 ) * d;
@ -225,12 +222,11 @@ namespace boost {
uint64_t fractional = M * a;
return (uint32_t)(get_remainder(fractional, d));
}
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
static inline std::size_t position(
std::size_t hash, std::size_t size_index)
{
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
std::size_t sizes_under_32bit = inv_sizes32_len;
if (BOOST_LIKELY(size_index < sizes_under_32bit)) {
@ -239,13 +235,9 @@ namespace boost {
} else {
return positions[size_index - sizes_under_32bit](hash);
}
#else
return fast_modulo(
hash, inv_sizes32[size_index], uint32_t(sizes[size_index]));
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
#else
return positions[size_index](hash);
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
}
}; // prime_fmod_size
@ -313,7 +305,7 @@ namespace boost {
// Similarly here, we have to re-express the integer initialization using
// arithmetic such that each literal can fit in a 32-bit value.
//
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// clang-format off
template <class T>
uint64_t prime_fmod_size<T>::inv_sizes32[] = {
@ -346,34 +338,27 @@ namespace boost {
(boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */,
(boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */,
(boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */,
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
};
#else
(boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(6ul) /* 4294967302 */
};
// clang-format on
#endif /* !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
template <class T>
std::size_t const
prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) /
sizeof(inv_sizes32[0]);
#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
#define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \
prime_fmod_size<T>::template modulo<n>,
template <class T>
std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = {
#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT)
#endif
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
#else
BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
#endif
};

View File

@ -28,15 +28,6 @@ void macros_test()
"BOOST_UNORDERED_FCA_HAS_64B_SIZE_T is defined");
#endif
}
#if ((defined(__GNUC__) || defined(__clang__)) && \
defined(__SIZEOF_INT128__)) || \
(defined(_MSC_VER) && defined(_M_X64))
#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
BOOST_ERROR("fast_modulo support should be enabled for this toolchain but "
"it's currently not");
#endif
#endif
}
// Pretty inefficient, but the test is fast enough.
@ -93,7 +84,7 @@ void prime_sizes_test()
BOOST_TEST_GT(sizes[i], sizes[i - 1]);
}
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
// now we wish to prove that if we do have the reciprocals stored, we have the
// correct amount of them, i.e. one for every entry in sizes[] that fits in 32
// bits
@ -140,13 +131,14 @@ void prime_sizes_test()
void get_remainder_test()
{
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
struct
{
// internally, our get_remainder() function will rely on either msvc
// intrinsics or 128 bit integer support which isn't always available. This
// is a slower, portable version we can use for verification of the
// routines.
// boost::unordered::detail::prime_fmod_size<>::get_remainder
// uses several internal implementations depending on the availability of
// certain intrinsics or 128 bit integer support, defaulting to a slow,
// portable routine. The following is a transcription of the portable
// routine used here for verification purposes.
//
boost::uint64_t operator()(boost::uint64_t f, boost::uint32_t d)
{
@ -178,7 +170,7 @@ void get_remainder_test()
#endif
}
void modulo32_test()
void modulo_test()
{
std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;
@ -188,39 +180,12 @@ void modulo32_test()
boost::detail::splitmix64 rng;
for (std::size_t i = 0; i < 1000000u; ++i) {
std::size_t hash = static_cast<boost::uint32_t>(rng());
for (std::size_t j = 0; j < sizes_len; ++j) {
std::size_t p1 =
boost::unordered::detail::prime_fmod_size<>::position(hash, j);
std::size_t p2 = hash % sizes[j];
if (!BOOST_TEST_EQ(p1, p2)) {
std::cerr << "hash: " << hash << ", j: " << j << ", sizes[" << j
<< "]: " << sizes[j] << std::endl;
return;
}
}
}
}
void modulo64_test()
{
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;
std::size_t const sizes_len =
boost::unordered::detail::prime_fmod_size<>::sizes_len;
boost::detail::splitmix64 rng;
for (std::size_t i = 0; i < 1000000u; ++i) {
std::size_t hash = rng();
std::size_t hash = static_cast<std::size_t>(rng());
for (std::size_t j = 0; j < sizes_len; ++j) {
std::size_t h = hash;
#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
if (sizes[j] <= UINT_MAX) {
h = boost::uint32_t(h) + boost::uint32_t(h >> 32);
}
@ -237,16 +202,14 @@ void modulo64_test()
}
}
}
#endif
}
int main()
{
macros_test();
prime_sizes_test();
modulo32_test();
get_remainder_test();
modulo64_test();
modulo_test();
return boost::report_errors();
}