From bc9b0e61770f72a9028b747d50df1115d425c89c Mon Sep 17 00:00:00 2001
From: Andrey Semashev <andrey.semashev@gmail.com>
Date: Sun, 16 Jan 2022 15:20:37 +0300
Subject: [PATCH] Implemented integer_log2 in terms of countl_zero from
 Boost.Core.

This allows to use compiler intrinsics and specialized hardware
instructions to compute log2, which results in better performance.

Also, added tests for the generic implementation using Boost.Multiprecision
integers.

Closes https://github.com/boostorg/integer/issues/31.
---
 include/boost/integer/integer_log2.hpp | 95 +++++++++++++++++---------
 test/integer_log2_test.cpp             | 26 ++++++-
 2 files changed, 88 insertions(+), 33 deletions(-)
diff --git a/include/boost/integer/integer_log2.hpp b/include/boost/integer/integer_log2.hpp
index 9f79f76..8ca236f 100644
--- a/include/boost/integer/integer_log2.hpp
+++ b/include/boost/integer/integer_log2.hpp
@@ -4,7 +4,8 @@
 //   Gives the integer part of the logarithm, in base 2, of a
 // given number. Behavior is undefined if the argument is <= 0.
 //
-//         Copyright (c) 2003-2004, 2008 Gennaro Prota
+//        Copyright (c) 2003-2004, 2008 Gennaro Prota
+//            Copyright (c) 2022 Andrey Semashev
 //
 // Distributed under the Boost Software License, Version 1.0.
 //    (See accompanying file LICENSE_1_0.txt or copy at
@@ -16,24 +17,52 @@
 #define BOOST_INTEGER_INTEGER_LOG2_HPP
 
 #include <climits>
+#include <limits>
 #include <boost/config.hpp>
 #include <boost/assert.hpp>
+#include <boost/cstdint.hpp>
+#include <boost/core/bit.hpp>
+#include <boost/core/enable_if.hpp>
+#include <boost/type_traits/is_integral.hpp>
+#include <boost/type_traits/make_unsigned.hpp>
 
 namespace boost {
 namespace detail {
 
-template <typename T>
-inline int integer_log2_impl(T x, unsigned int n)
+// helper to find the maximum power of two
+// less than p
+template< unsigned int p, unsigned int n, bool = ((2u * n) < p) >
+struct max_pow2_less :
+    public max_pow2_less< p, 2u * n >
 {
-    int result = 0;
+};
 
+template< unsigned int p, unsigned int n >
+struct max_pow2_less< p, n, false >
+{
+    BOOST_STATIC_CONSTANT(unsigned int, value = n);
+};
+
+template< typename T >
+inline typename boost::disable_if< boost::is_integral< T >, int >::type integer_log2_impl(T x)
+{
+    unsigned int n = detail::max_pow2_less<
+        std::numeric_limits< T >::digits,
+        CHAR_BIT / 2u
+    >::value;
+
+    int result = 0;
     while (x != 1)
     {
-        const T t = static_cast<T>(x >> n);
+        T t(x >> n);
         if (t)
         {
-            result += static_cast<int>(n);
+            result += static_cast< int >(n);
+#if !defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
+            x = static_cast< T&& >(t);
+#else
             x = t;
+#endif
         }
         n >>= 1u;
     }
@@ -41,19 +70,34 @@ inline int integer_log2_impl(T x, unsigned int n)
     return result;
 }
 
-// helper to find the maximum power of two
-// less than p
-template <unsigned int p, unsigned int n, bool = (2u*n < p)>
-struct max_pow2_less :
-    public max_pow2_less< p, 2u*n >
+template< typename T >
+inline typename boost::enable_if< boost::is_integral< T >, int >::type integer_log2_impl(T x)
 {
-};
+    // We could simply rely on numeric_limits but sometimes
+    // Borland tries to use numeric_limits<const T>, because
+    // of its usual const-related problems in argument deduction
+    // - gps
+    return static_cast< int >((sizeof(T) * CHAR_BIT - 1u) -
+        boost::core::countl_zero(static_cast< typename boost::make_unsigned< T >::type >(x)));
+}
 
-template <unsigned int p, unsigned int n>
-struct max_pow2_less<p, n, false>
+#if defined(BOOST_HAS_INT128)
+// We need to provide explicit overloads for __int128 because (a) boost/core/bit.hpp currently does not support it and
+// (b) std::numeric_limits are not specialized for __int128 in some standard libraries.
+inline int integer_log2_impl(boost::uint128_type x)
 {
-    BOOST_STATIC_CONSTANT(unsigned int, value = n);
-};
+    const boost::uint64_t x_hi = static_cast< boost::uint64_t >(x >> 64u);
+    if (x_hi != 0u)
+        return 127 - boost::core::countl_zero(x_hi);
+    else
+        return 63 - boost::core::countl_zero(static_cast< boost::uint64_t >(x));
+}
+
+inline int integer_log2_impl(boost::int128_type x)
+{
+    return detail::integer_log2_impl(static_cast< boost::uint128_type >(x));
+}
+#endif // defined(BOOST_HAS_INT128)
 
 } // namespace detail
 
@@ -61,26 +105,13 @@ struct max_pow2_less<p, n, false>
 // ------------
 // integer_log2
 // ------------
-template <typename T>
+template< typename T >
 inline int integer_log2(T x)
 {
     BOOST_ASSERT(x > 0);
-
-    return detail::integer_log2_impl
-    (
-        x,
-        detail::max_pow2_less<
-            // We could simply rely on numeric_limits but sometimes
-            // Borland tries to use numeric_limits<const T>, because
-            // of its usual const-related problems in argument deduction
-            // - gps
-            // Also, numeric_limits is not specialized for __int128 in libstdc++.
-            sizeof(T) * CHAR_BIT,
-            CHAR_BIT / 2u
-        >::value
-    );
+    return detail::integer_log2_impl(x);
 }
 
-}
+} // namespace boost
 
 #endif // BOOST_INTEGER_INTEGER_LOG2_HPP
diff --git a/test/integer_log2_test.cpp b/test/integer_log2_test.cpp
index 064b0e4..923487c 100644
--- a/test/integer_log2_test.cpp
+++ b/test/integer_log2_test.cpp
@@ -15,6 +15,11 @@
 
 #include <iostream>
 
+#include "multiprecision_config.hpp"
+
+#if !defined(DISABLE_MP_TESTS)
+#include <boost/multiprecision/cpp_int.hpp>
+#endif
 
 // Macros to compact code
 #define PRIVATE_LB_TEST( v, e )  BOOST_TEST( ::boost::integer_log2((v)) == e )
@@ -29,7 +34,6 @@
 #define PRIVATE_LB_0_TEST  PRIVATE_PRINT_LB( 0 )
 #endif
 
-
 // Main testing function
 int main()
 {
@@ -159,5 +163,25 @@ int main()
     PRIVATE_LB_TEST( ~boost::uint128_type(0u), 127 );
 #endif
 
+#if !defined(DISABLE_MP_TESTS)
+    PRIVATE_LB_TEST( boost::multiprecision::cpp_int(1), 0 );
+    PRIVATE_LB_TEST( boost::multiprecision::cpp_int(2), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::cpp_int(3), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::cpp_int(65535), 15 );
+    PRIVATE_LB_TEST( boost::multiprecision::cpp_int(65536), 16 );
+
+    PRIVATE_LB_TEST( boost::multiprecision::int1024_t(1), 0 );
+    PRIVATE_LB_TEST( boost::multiprecision::int1024_t(2), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::int1024_t(3), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::int1024_t(65535), 15 );
+    PRIVATE_LB_TEST( boost::multiprecision::int1024_t(65536), 16 );
+
+    PRIVATE_LB_TEST( boost::multiprecision::uint1024_t(1), 0 );
+    PRIVATE_LB_TEST( boost::multiprecision::uint1024_t(2), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::uint1024_t(3), 1 );
+    PRIVATE_LB_TEST( boost::multiprecision::uint1024_t(65535), 15 );
+    PRIVATE_LB_TEST( boost::multiprecision::uint1024_t(65536), 16 );
+#endif
+
     return boost::report_errors();
 }