From caee07a64356b9c42275fc243ed81d9fa9fc4fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ion=20Gazta=C3=B1aga?= Date: Sun, 19 Jan 2014 14:27:06 +0100 Subject: [PATCH] Improved floor_log2 function with intrinsics when available. Used De Brujin multiplication method otherwise. --- include/boost/intrusive/detail/utilities.hpp | 162 +++++++++++++++++-- 1 file changed, 145 insertions(+), 17 deletions(-) diff --git a/include/boost/intrusive/detail/utilities.hpp b/include/boost/intrusive/detail/utilities.hpp index ab7b02c..c52227c 100644 --- a/include/boost/intrusive/detail/utilities.hpp +++ b/include/boost/intrusive/detail/utilities.hpp @@ -364,25 +364,153 @@ template void destructor_impl(Hook &, detail::link_dispatch) {} -//This function uses binary search to discover the -//highest set bit of the integer -inline std::size_t floor_log2 (std::size_t x) -{ - const std::size_t Bits = sizeof(std::size_t)*CHAR_BIT; - const bool Size_t_Bits_Power_2= !(Bits & (Bits-1)); - BOOST_STATIC_ASSERT(Size_t_Bits_Power_2); +/////////////////////////// +// floor_log2 Dispatcher +//////////////////////////// - std::size_t n = x; - std::size_t log2 = 0; +#if defined(_MSC_VER) && (_MSC_VER >= 1400) - for(std::size_t shift = Bits >> 1; shift; shift >>= 1){ - std::size_t tmp = n >> shift; - if (tmp) - log2 += shift, n = tmp; + }}} //namespace boost::intrusive::detail + + #include + + namespace boost { + namespace intrusive { + namespace detail { + + #if defined(_M_X64) || defined(_M_AMD64) || defined(_M_IA64) //64 bit target + #define BOOST_INTRUSIVE_BSR_INTRINSIC _BitScanReverse64 + #else //32 bit target + #define BOOST_INTRUSIVE_BSR_INTRINSIC _BitScanReverse + #endif + + inline std::size_t floor_log2 (std::size_t x) + { + unsigned long log2; + BOOST_INTRUSIVE_BSR_INTRINSIC( &log2, (unsigned long)x ); + return log2; } - return log2; -} + #undef BOOST_INTRUSIVE_BSR_INTRINSIC + +#elif defined(_MSC_VER) //visual 2003 + + inline std::size_t floor_log2 (std::size_t x) + { + unsigned long log2; + __asm + { + bsr eax, x + mov log2, eax + } + return static_cast(log2); + } + +#elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) //GCC >=3.4 + + #if SIZE_MAX > UINT_MAX + #define BOOST_INTRUSIVE_CLZ_INTRINSIC __builtin_clzll + #elif SIZE_MAX > UINT_MAX + #define BOOST_INTRUSIVE_CLZ_INTRINSIC __builtin_clzl + #else + #define BOOST_INTRUSIVE_CLZ_INTRINSIC __builtin_clz + #endif + + inline std::size_t floor_log2(std::size_t n) + { + return sizeof(std::size_t)*CHAR_BIT - 1 - BOOST_INTRUSIVE_CLZ_INTRINSIC(n); + } + + #undef BOOST_INTRUSIVE_CLZ_INTRINSIC + +#else //Portable methods + +//////////////////////////// +// Generic method +//////////////////////////// + + inline std::size_t floor_log2_get_shift(std::size_t n, true_ )//power of two size_t + { return n >> 1; } + + inline std::size_t floor_log2_get_shift(std::size_t n, false_ )//non-power of two size_t + { return (n >> 1) + ((n & 1u) & (n != 1)); } + + template + inline std::size_t floor_log2 (std::size_t x, integer) + { + const std::size_t Bits = N; + const bool Size_t_Bits_Power_2= !(Bits & (Bits-1)); + + std::size_t n = x; + std::size_t log2 = 0; + + std::size_t remaining_bits = Bits; + std::size_t shift = floor_log2_get_shift(remaining_bits, bool_()); + while(shift){ + std::size_t tmp = n >> shift; + if (tmp){ + log2 += shift, n = tmp; + } + shift = floor_log2_get_shift(shift, bool_()); + } + + return log2; + } + + //////////////////////////// + // DeBruijn method + //////////////////////////// + + //Taken from: + //http://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers + //Thanks to Desmond Hume + + inline std::size_t floor_log2 (std::size_t v, integer) + { + static const int MultiplyDeBruijnBitPosition[32] = + { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + return MultiplyDeBruijnBitPosition[(std::size_t)(v * 0x07C4ACDDU) >> 27]; + } + + inline std::size_t floor_log2 (std::size_t v, integer) + { + static const std::size_t MultiplyDeBruijnBitPosition[64] = { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5}; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return MultiplyDeBruijnBitPosition[((std::size_t)((v - (v >> 1))*0x07EDD5E59A4E28C2ULL)) >> 58]; + } + + + inline std::size_t floor_log2 (std::size_t x) + { + const std::size_t Bits = sizeof(std::size_t)*CHAR_BIT; + return floor_log2(x, integer()); + } + +#endif //Thanks to Laurent de Soras in //http://www.flipcode.com/archives/Fast_log_Function.shtml @@ -404,13 +532,13 @@ inline float fast_log2 (float val) //1+log2(m), m ranging from 1 to 2 //3rd degree polynomial keeping first derivate continuity. //For less precision the line can be commented out - val = ((-1.0f/3.f) * val + 2.f) * val - (2.0f/3.f); + val = ((-1.f/3.f) * val + 2.f) * val - (2.f/3.f); return (val + log_2); } inline std::size_t ceil_log2 (std::size_t x) { - return ((x & (x-1))!= 0) + floor_log2(x); + return static_cast((x & (x-1)) != 0) + floor_log2(x); } template