made fast_modulo universally available in 64 bits and never used in 32 bits

2025-11-15 14:59:32 +01:00 · 2022-06-26 19:13:54 +02:00
parent 2670bb149d
commit fb733483c6
2 changed files with 26 additions and 78 deletions
--- a/include/boost/unordered/detail/fca.hpp
+++ b/include/boost/unordered/detail/fca.hpp
@@ -148,10 +148,6 @@ namespace boost {
 #if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0
 #define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T
 #endif
-#endif
-
-#if !defined(BOOST_NO_INT64_T)
-#define BOOST_UNORDERED_FCA_FASTMOD_SUPPORT
 #endif

      template <class = void> struct prime_fmod_size
@@ -166,10 +162,10 @@ namespace boost {
        static std::size_t const sizes_len;
        static std::size_t (*positions[])(std::size_t);

-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
        static uint64_t inv_sizes32[];
        static std::size_t const inv_sizes32_len;
-#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
+#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */

        static inline std::size_t size_index(std::size_t n)
        {
@@ -192,7 +188,7 @@ namespace boost {
          return hash % Size;
        }

-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
        // We emulate the techniques taken from:
        // Faster Remainder by Direct Computation: Applications to Compilers and
        // Software Libraries
@@ -204,14 +200,15 @@ namespace boost {

        static inline uint64_t get_remainder(uint64_t fractional, uint32_t d)
        {
-#if defined(_MSC_VER) && defined(_WIN64)
-          // use MSVC instrinsic when available to avoid promotion to 128 bits
+#if defined(_MSC_VER)
+          // use MSVC intrinsics when available to avoid promotion to 128 bits

          return __umulh(fractional, d);
 #elif defined(BOOST_HAS_INT128)
          return static_cast<uint64_t>(((boost::uint128_type)fractional * d) >> 64);
 #else
-          // portable implementation in the absence of boost::uint128_type
+          // portable implementation in the absence of boost::uint128_type on 64 bits,
+          // which happens at least in GCC 4.5 and prior 

          uint64_t r1 = (fractional & UINT32_MAX) * d;
          uint64_t r2 = (fractional >> 32 ) * d;
@@ -225,12 +222,11 @@ namespace boost {
          uint64_t fractional = M * a;
          return (uint32_t)(get_remainder(fractional, d));
        }
-#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
+#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */

        static inline std::size_t position(
          std::size_t hash, std::size_t size_index)
        {
-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
 #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
          std::size_t sizes_under_32bit = inv_sizes32_len;
          if (BOOST_LIKELY(size_index < sizes_under_32bit)) {
@@ -239,13 +235,9 @@ namespace boost {
          } else {
            return positions[size_index - sizes_under_32bit](hash);
          }
-#else
-          return fast_modulo(
-            hash, inv_sizes32[size_index], uint32_t(sizes[size_index]));
-#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
 #else
          return positions[size_index](hash);
-#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
+#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
        }
      }; // prime_fmod_size

@@ -313,7 +305,7 @@ namespace boost {
 // Similarly here, we have to re-express the integer initialization using
 // arithmetic such that each literal can fit in a 32-bit value.
 //
-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
      // clang-format off
        template <class T>
        uint64_t prime_fmod_size<T>::inv_sizes32[] = {
@@ -346,34 +338,27 @@ namespace boost {
          (boost::ulong_long_type(5ul) << 32)         + boost::ulong_long_type(1431653234ul) /* = 22906489714 */,
          (boost::ulong_long_type(2ul) << 32)         + boost::ulong_long_type(2863311496ul) /* = 11453246088 */,
          (boost::ulong_long_type(1ul) << 32)         + boost::ulong_long_type(1431655764ul) /* = 5726623060 */,
-#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
-        };
-#else
-          (boost::ulong_long_type(1ul) << 32)         + boost::ulong_long_type(6ul)          /* 4294967302 */
        };
      // clang-format on
-#endif /* !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */

      template <class T>
      std::size_t const
        prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) /
                                              sizeof(inv_sizes32[0]);

-#endif /* defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT) */
+#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */

 #define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n)                  \
  prime_fmod_size<T>::template modulo<n>,

      template <class T>
      std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = {
-#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
        BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
          BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT)
-#endif
-
-#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
-          BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
-            BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
+#else
+        BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
+          BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
 #endif
      };

--- a/test/unordered/prime_fmod_tests.cpp
+++ b/test/unordered/prime_fmod_tests.cpp
@@ -28,15 +28,6 @@ void macros_test()
                "BOOST_UNORDERED_FCA_HAS_64B_SIZE_T is defined");
 #endif
  }
-
-#if ((defined(__GNUC__) || defined(__clang__)) &&                              \
-     defined(__SIZEOF_INT128__)) ||                                            \
-  (defined(_MSC_VER) && defined(_M_X64))
-#if !defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
-  BOOST_ERROR("fast_modulo support should be enabled for this toolchain but "
-              "it's currently not");
-#endif
-#endif
 }

 // Pretty inefficient, but the test is fast enough.
@@ -93,7 +84,7 @@ void prime_sizes_test()
    BOOST_TEST_GT(sizes[i], sizes[i - 1]);
  }

-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
  // now we wish to prove that if we do have the reciprocals stored, we have the
  // correct amount of them, i.e. one for every entry in sizes[] that fits in 32
  // bits
@@ -140,13 +131,14 @@ void prime_sizes_test()

 void get_remainder_test()
 {
-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
  struct
  {
-    // internally, our get_remainder() function will rely on either msvc
-    // intrinsics or 128 bit integer support which isn't always available. This
-    // is a slower, portable version we can use for verification of the
-    // routines.
+    // boost::unordered::detail::prime_fmod_size<>::get_remainder
+    // uses several internal implementations depending on the availability of
+    // certain intrinsics or 128 bit integer support, defaulting to a slow,
+    // portable routine. The following is a transcription of the portable
+    // routine used here for verification purposes.
    //
    boost::uint64_t operator()(boost::uint64_t f, boost::uint32_t d)
    {
@@ -178,7 +170,7 @@ void get_remainder_test()
 #endif
 }

-void modulo32_test()
+void modulo_test()
 {
  std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;

@@ -188,39 +180,12 @@ void modulo32_test()
  boost::detail::splitmix64 rng;

  for (std::size_t i = 0; i < 1000000u; ++i) {
-    std::size_t hash = static_cast<boost::uint32_t>(rng());
-
-    for (std::size_t j = 0; j < sizes_len; ++j) {
-      std::size_t p1 =
-        boost::unordered::detail::prime_fmod_size<>::position(hash, j);
-
-      std::size_t p2 = hash % sizes[j];
-      if (!BOOST_TEST_EQ(p1, p2)) {
-        std::cerr << "hash: " << hash << ", j: " << j << ", sizes[" << j
-                  << "]: " << sizes[j] << std::endl;
-        return;
-      }
-    }
-  }
-}
-
-void modulo64_test()
-{
-#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
-  std::size_t* sizes = boost::unordered::detail::prime_fmod_size<>::sizes;
-
-  std::size_t const sizes_len =
-    boost::unordered::detail::prime_fmod_size<>::sizes_len;
-
-  boost::detail::splitmix64 rng;
-
-  for (std::size_t i = 0; i < 1000000u; ++i) {
-    std::size_t hash = rng();
+    std::size_t hash = static_cast<std::size_t>(rng());

    for (std::size_t j = 0; j < sizes_len; ++j) {
      std::size_t h = hash;

-#if defined(BOOST_UNORDERED_FCA_FASTMOD_SUPPORT)
+#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
      if (sizes[j] <= UINT_MAX) {
        h = boost::uint32_t(h) + boost::uint32_t(h >> 32);
      }
@@ -237,16 +202,14 @@ void modulo64_test()
      }
    }
  }
-#endif
 }

 int main()
 {
  macros_test();
  prime_sizes_test();
-  modulo32_test();
  get_remainder_test();
-  modulo64_test();
+  modulo_test();

  return boost::report_errors();
 }