From f0d0a1ebd76716246a384a55d10323f00f88b409 Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Sat, 25 Aug 2018 16:08:32 -0700 Subject: [PATCH] Implement Grisu2 digit generation --- CMakeLists.txt | 6 +-- include/fmt/format-inl.h | 79 +++++++++++++++++++++++++++++++++++++++- include/fmt/format.h | 43 +++++++--------------- 3 files changed, 95 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65d5f2e6..c25be5f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") -Wcast-qual -Wformat=2 -Wmissing-include-dirs -Wcast-align -Wnon-virtual-dtor -Wctor-dtor-privacy -Wdisabled-optimization - -Winvalid-pch -Wmissing-declarations -Woverloaded-virtual + -Winvalid-pch -Woverloaded-virtual -Wno-ctor-dtor-privacy -Wno-dangling-else -Wno-float-equal -Wno-format-nonliteral -Wno-sign-conversion -Wno-shadow) if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6) @@ -101,8 +101,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") -Wno-unused-member-function -Wno-format-nonliteral -Wno-missing-noreturn -Wno-undefined-func-template -Wno-shadow -Wno-sign-conversion -Wno-used-but-marked-unused - -Wno-covered-switch-default -Wno-missing-variable-declarations - -Wno-double-promotion) + -Wno-covered-switch-default -Wno-missing-prototypes + -Wno-missing-variable-declarations -Wno-double-promotion) set(WERROR_FLAG -Werror) diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index 49779a0f..98aaee26 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -275,11 +275,16 @@ const char basic_data::DIGITS[] = template const uint32_t basic_data::POWERS_OF_10_32[] = { + 1, FMT_POWERS_OF_10(1) +}; + +template +const uint32_t basic_data::ZERO_OR_POWERS_OF_10_32[] = { 0, FMT_POWERS_OF_10(1) }; template -const uint64_t basic_data::POWERS_OF_10_64[] = { +const uint64_t basic_data::ZERO_OR_POWERS_OF_10_64[] = { 0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ull), @@ -361,6 +366,78 @@ FMT_FUNC fp get_cached_power(int min_exponent, int &pow10_exponent) { pow10_exponent = first_dec_exp + index * dec_exp_step; return fp(data::POW10_SIGNIFICANDS[index], data::POW10_EXPONENTS[index]); } + +// Generates output using Grisu2 digit-gen algorithm. +FMT_FUNC void grisu2_gen_digits( + const fp &scaled_value, const fp &scaled_upper, uint64_t delta, + char *buffer, size_t &size, int &dec_exp) { + internal::fp one(1ull << -scaled_upper.e, scaled_upper.e); + uint32_t hi = static_cast(scaled_upper.f >> -one.e); // p1 in Grisu + uint64_t lo = scaled_upper.f & (one.f - 1); // p2 in Grisu + size = 0; + auto kappa = count_digits(hi); // TODO: more descriptive name + while (kappa > 0) { + uint32_t digit = 0; + // This optimization by miloyip reduces the number of integer divisions by + // one per iteration. + switch (kappa) { + case 10: digit = hi / 1000000000; hi %= 1000000000; break; + case 9: digit = hi / 100000000; hi %= 100000000; break; + case 8: digit = hi / 10000000; hi %= 10000000; break; + case 7: digit = hi / 1000000; hi %= 1000000; break; + case 6: digit = hi / 100000; hi %= 100000; break; + case 5: digit = hi / 10000; hi %= 10000; break; + case 4: digit = hi / 1000; hi %= 1000; break; + case 3: digit = hi / 100; hi %= 100; break; + case 2: digit = hi / 10; hi %= 10; break; + case 1: digit = hi; hi = 0; break; + default: + FMT_ASSERT(false, "invalid number of digits"); + } + if (digit != 0 || size != 0) + buffer[size++] = '0' + static_cast(digit); + --kappa; + uint64_t remainder = (static_cast(hi) << -one.e) + lo; + if (remainder <= delta) { + dec_exp += kappa; + // TODO: use scaled_value + (void)scaled_value; + return; + } + } + for (;;) { + lo *= 10; + delta *= 10; + char digit = static_cast(lo >> -one.e); + if (digit != 0 || size != 0) + buffer[size++] = '0' + digit; + lo &= one.f - 1; + --kappa; + if (lo < delta) { + dec_exp += kappa; + return; + } + } +} + +FMT_FUNC void grisu2_format(double value, char *buffer, size_t &size) { + fp fp_value(value); + fp lower, upper; + fp_value.compute_boundaries(lower, upper); + // Find a cached power of 10 close to 1 / upper. + int dec_exp = 0; // K in Grisu paper. + const int min_exp = -60; + auto dec_pow = get_cached_power( + min_exp - (upper.e + fp::significand_size), dec_exp); + fp_value.normalize(); + fp scaled_value = fp_value * dec_pow; + fp scaled_lower = lower * dec_pow; + fp scaled_upper = upper * dec_pow; + ++scaled_lower.f; // +1 ulp + --scaled_upper.f; // -1 ulp + uint64_t delta = scaled_upper.f - scaled_lower.f; + grisu2_gen_digits(scaled_value, scaled_upper, delta, buffer, size, dec_exp); +} } // namespace internal #if FMT_USE_WINDOWS_H diff --git a/include/fmt/format.h b/include/fmt/format.h index bd559a52..f4c0f6ee 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -365,6 +365,10 @@ FMT_API fp operator*(fp x, fp y); // (binary) exponent satisfies min_exponent <= c_k.e <= min_exponent + 3. FMT_API fp get_cached_power(int min_exponent, int &pow10_exponent); +// Formats value using Grisu2 algorithm: +// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf +FMT_API void grisu2_format(double value, char *buffer, size_t &size); + template typename Allocator::value_type *allocate(Allocator& alloc, std::size_t n) { #if __cplusplus >= 201103L || FMT_MSC_VER >= 1700 @@ -952,7 +956,8 @@ struct int_traits { template struct FMT_API basic_data { static const uint32_t POWERS_OF_10_32[]; - static const uint64_t POWERS_OF_10_64[]; + static const uint32_t ZERO_OR_POWERS_OF_10_32[]; + static const uint64_t ZERO_OR_POWERS_OF_10_64[]; static const uint64_t POW10_SIGNIFICANDS[]; static const int16_t POW10_EXPONENTS[]; static const char DIGITS[]; @@ -973,7 +978,7 @@ inline unsigned count_digits(uint64_t n) { // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; - return to_unsigned(t) - (n < data::POWERS_OF_10_64[t]) + 1; + return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_64[t]) + 1; } #else // Fallback version of count_digits used when __builtin_clz is not available. @@ -1043,7 +1048,8 @@ class decimal_formatter { // https://github.com/jeaiii/itoa unsigned n = N - 1; unsigned a = n / 5 * n * 53 / 16; - uint64_t t = ((1ULL << (32 + a)) / data::POWERS_OF_10_32[n] + 1 - n / 9); + uint64_t t = ((1ULL << (32 + a)) / + data::ZERO_OR_POWERS_OF_10_32[n] + 1 - n / 9); t = ((t * u) >> a) + n / 5 * 4; write_pair(0, t >> 32); for (unsigned i = 2; i < N; i += 2) { @@ -1075,7 +1081,7 @@ class decimal_formatter_null : public decimal_formatter { // Optional version of count_digits for better performance on 32-bit platforms. inline unsigned count_digits(uint32_t n) { int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; - return to_unsigned(t) - (n < data::POWERS_OF_10_32[t]) + 1; + return to_unsigned(t) - (n < data::ZERO_OR_POWERS_OF_10_32[t]) + 1; } #endif @@ -2943,31 +2949,10 @@ void basic_writer::write_double(T value, const format_specs &spec) { basic_memory_buffer buffer; if (internal::const_check(FMT_USE_GRISU && sizeof(T) <= sizeof(double) && std::numeric_limits::is_iec559)) { - internal::fp fp_value(static_cast(value)); - fp_value.normalize(); - // Find a cached power of 10 close to 1 / fp_value. - int dec_exp = 0; - const int min_exp = -60; - auto dec_pow = internal::get_cached_power( - min_exp - (fp_value.e + internal::fp::significand_size), dec_exp); - internal::fp product = fp_value * dec_pow; - // Generate output using Grisu digit-gen-mix algorithm. - internal::fp one(1ull << -product.e, product.e); - uint64_t hi = product.f >> -one.e; - uint64_t f = product.f & (one.f - 1); - typedef back_insert_range> range; - basic_writer w{range(buffer)}; - w.write(hi); - size_t digits = buffer.size(); - w.write('.'); - const unsigned max_digits = 18; - while (digits++ < max_digits) { - f *= 10; - w.write(static_cast('0' + (f >> -one.e))); - f &= one.f - 1; - } - w.write('e'); - w.write(-dec_exp); + char buf[100]; // TODO: max size + size_t size = 0; + internal::grisu2_format(static_cast(value), buf, size); + buffer.append(buf, buf + size); // TODO: avoid extra copy } else { format_specs normalized_spec(spec); normalized_spec.type_ = handler.type;