diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index fcc1d742..1e72a3b5 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -128,567 +128,16 @@ FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, namespace detail { -template struct basic_impl_data { - // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. - // These are generated by support/compute-powers.py. - static constexpr uint64_t pow10_significands[87] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, - }; - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wnarrowing" -#endif - // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding - // to significands above. - static constexpr int16_t pow10_exponents[87] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic pop -#endif - - static constexpr uint64_t power_of_10_64[20] = { - 1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; -}; - -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct impl_data : basic_impl_data<> {}; - #if __cplusplus < 201703L -template -constexpr uint64_t basic_impl_data::pow10_significands[]; -template constexpr int16_t basic_impl_data::pow10_exponents[]; -template constexpr uint64_t basic_impl_data::power_of_10_64[]; +template constexpr uint64_t basic_data::pow10_significands[]; +template constexpr int16_t basic_data::pow10_exponents[]; +template constexpr uint64_t basic_data::power_of_10_64[]; #endif -// Normalizes the value converted from double and multiplied by (1 << SHIFT). -template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { - // Handle subnormals. - const auto implicit_bit = F(1) << num_significand_bits(); - const auto shifted_implicit_bit = implicit_bit << SHIFT; - while ((value.f & shifted_implicit_bit) == 0) { - value.f <<= 1; - --value.e; - } - // Subtract 1 to account for hidden bit. - const auto offset = basic_fp::num_significand_bits - - num_significand_bits() - SHIFT - 1; - value.f <<= offset; - value.e -= offset; - return value; -} - template inline bool operator==(basic_fp x, basic_fp y) { return x.f == y.f && x.e == y.e; } -// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { -#if FMT_USE_INT128 - auto product = static_cast<__uint128_t>(lhs) * rhs; - auto f = static_cast(product >> 64); - return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; -#else - // Multiply 32-bit parts of significands. - uint64_t mask = (1ULL << 32) - 1; - uint64_t a = lhs >> 32, b = lhs & mask; - uint64_t c = rhs >> 32, d = rhs & mask; - uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; - // Compute mid 64-bit of result and round. - uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); - return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); -#endif -} - -using fp = basic_fp; - -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { - return {multiply(x.f, y.f), x.e + y.e + 64}; -} - -// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its -// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. -FMT_CONSTEXPR inline fp get_cached_power(int min_exponent, - int& pow10_exponent) { - const int shift = 32; - // log10(2) = 0x0.4d104d427de7fbcc... - const int64_t significand = 0x4d104d427de7fbcc; - int index = static_cast( - ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) + - ((int64_t(1) << shift) - 1)) // ceil - >> 32 // arithmetic shift - ); - // Decimal exponent of the first (smallest) cached power of 10. - const int first_dec_exp = -348; - // Difference between 2 consecutive decimal exponents in cached powers of 10. - const int dec_exp_step = 8; - index = (index - first_dec_exp - 1) / dec_exp_step + 1; - pow10_exponent = first_dec_exp + index * dec_exp_step; - return {impl_data::pow10_significands[index], - impl_data::pow10_exponents[index]}; -} - -class bigint { - private: - // A bigint is stored as an array of bigits (big digits), with bigit at index - // 0 being the least significant one. - using bigit = uint32_t; - using double_bigit = uint64_t; - enum { bigits_capacity = 32 }; - basic_memory_buffer bigits_; - int exp_; - - FMT_CONSTEXPR20 bigit operator[](int index) const { - return bigits_[to_unsigned(index)]; - } - FMT_CONSTEXPR20 bigit& operator[](int index) { - return bigits_[to_unsigned(index)]; - } - - static FMT_CONSTEXPR_DECL const int bigit_bits = num_bits(); - - friend struct formatter; - - FMT_CONSTEXPR20 void subtract_bigits(int index, bigit other, bigit& borrow) { - auto result = static_cast((*this)[index]) - other - borrow; - (*this)[index] = static_cast(result); - borrow = static_cast(result >> (bigit_bits * 2 - 1)); - } - - FMT_CONSTEXPR20 void remove_leading_zeros() { - int num_bigits = static_cast(bigits_.size()) - 1; - while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; - bigits_.resize(to_unsigned(num_bigits + 1)); - } - - // Computes *this -= other assuming aligned bigints and *this >= other. - FMT_CONSTEXPR20 void subtract_aligned(const bigint& other) { - FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); - FMT_ASSERT(compare(*this, other) >= 0, ""); - bigit borrow = 0; - int i = other.exp_ - exp_; - for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) - subtract_bigits(i, other.bigits_[j], borrow); - while (borrow > 0) subtract_bigits(i, 0, borrow); - remove_leading_zeros(); - } - - FMT_CONSTEXPR20 void multiply(uint32_t value) { - const double_bigit wide_value = value; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - double_bigit result = bigits_[i] * wide_value + carry; - bigits_[i] = static_cast(result); - carry = static_cast(result >> bigit_bits); - } - if (carry != 0) bigits_.push_back(carry); - } - - template ::value || - std::is_same::value)> - FMT_CONSTEXPR20 void multiply(UInt value) { - using half_uint = - conditional_t::value, uint64_t, uint32_t>; - const int shift = num_bits() - bigit_bits; - const UInt lower = static_cast(value); - const UInt upper = value >> num_bits(); - UInt carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - UInt result = lower * bigits_[i] + static_cast(carry); - carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) + - (carry >> bigit_bits); - bigits_[i] = static_cast(result); - } - while (carry != 0) { - bigits_.push_back(static_cast(carry)); - carry >>= bigit_bits; - } - } - - template ::value || - std::is_same::value)> - FMT_CONSTEXPR20 void assign(UInt n) { - size_t num_bigits = 0; - do { - bigits_[num_bigits++] = static_cast(n); - n >>= bigit_bits; - } while (n != 0); - bigits_.resize(num_bigits); - exp_ = 0; - } - - public: - FMT_CONSTEXPR20 bigint() : exp_(0) {} - explicit bigint(uint64_t n) { assign(n); } - - bigint(const bigint&) = delete; - void operator=(const bigint&) = delete; - - FMT_CONSTEXPR20 void assign(const bigint& other) { - auto size = other.bigits_.size(); - bigits_.resize(size); - auto data = other.bigits_.data(); - std::copy(data, data + size, make_checked(bigits_.data(), size)); - exp_ = other.exp_; - } - - template FMT_CONSTEXPR20 void operator=(Int n) { - FMT_ASSERT(n > 0, ""); - assign(uint64_or_128_t(n)); - } - - FMT_CONSTEXPR20 int num_bigits() const { - return static_cast(bigits_.size()) + exp_; - } - - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { - FMT_ASSERT(shift >= 0, ""); - exp_ += shift / bigit_bits; - shift %= bigit_bits; - if (shift == 0) return *this; - bigit carry = 0; - for (size_t i = 0, n = bigits_.size(); i < n; ++i) { - bigit c = bigits_[i] >> (bigit_bits - shift); - bigits_[i] = (bigits_[i] << shift) + carry; - carry = c; - } - if (carry != 0) bigits_.push_back(carry); - return *this; - } - - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { - FMT_ASSERT(value > 0, ""); - multiply(uint32_or_64_or_128_t(value)); - return *this; - } - - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { - int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); - if (num_lhs_bigits != num_rhs_bigits) - return num_lhs_bigits > num_rhs_bigits ? 1 : -1; - int i = static_cast(lhs.bigits_.size()) - 1; - int j = static_cast(rhs.bigits_.size()) - 1; - int end = i - j; - if (end < 0) end = 0; - for (; i >= end; --i, --j) { - bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; - if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; - } - if (i != j) return i > j ? 1 : -1; - return 0; - } - - // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { - int max_lhs_bigits = (std::max)(lhs1.num_bigits(), lhs2.num_bigits()); - int num_rhs_bigits = rhs.num_bigits(); - if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; - if (max_lhs_bigits > num_rhs_bigits) return 1; - auto get_bigit = [](const bigint& n, int i) -> bigit { - return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; - }; - double_bigit borrow = 0; - int min_exp = (std::min)((std::min)(lhs1.exp_, lhs2.exp_), rhs.exp_); - for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { - double_bigit sum = - static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); - bigit rhs_bigit = get_bigit(rhs, i); - if (sum > rhs_bigit + borrow) return 1; - borrow = rhs_bigit + borrow - sum; - if (borrow > 1) return -1; - borrow <<= bigit_bits; - } - return borrow != 0 ? -1 : 0; - } - - // Assigns pow(10, exp) to this bigint. - FMT_CONSTEXPR20 void assign_pow10(int exp) { - FMT_ASSERT(exp >= 0, ""); - if (exp == 0) return *this = 1; - // Find the top bit. - int bitmask = 1; - while (exp >= bitmask) bitmask <<= 1; - bitmask >>= 1; - // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by - // repeated squaring and multiplication. - *this = 5; - bitmask >>= 1; - while (bitmask != 0) { - square(); - if ((exp & bitmask) != 0) *this *= 5; - bitmask >>= 1; - } - *this <<= exp; // Multiply by pow(2, exp) by shifting. - } - - FMT_CONSTEXPR20 void square() { - int num_bigits = static_cast(bigits_.size()); - int num_result_bigits = 2 * num_bigits; - basic_memory_buffer n(std::move(bigits_)); - bigits_.resize(to_unsigned(num_result_bigits)); - auto sum = uint128_t(); - for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { - // Compute bigit at position bigit_index of the result by adding - // cross-product terms n[i] * n[j] such that i + j == bigit_index. - for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { - // Most terms are multiplied twice which can be optimized in the future. - sum += static_cast(n[i]) * n[j]; - } - (*this)[bigit_index] = static_cast(sum); - sum >>= num_bits(); // Compute the carry. - } - // Do the same for the top half. - for (int bigit_index = num_bigits; bigit_index < num_result_bigits; - ++bigit_index) { - for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) - sum += static_cast(n[i++]) * n[j--]; - (*this)[bigit_index] = static_cast(sum); - sum >>= num_bits(); - } - remove_leading_zeros(); - exp_ *= 2; - } - - // If this bigint has a bigger exponent than other, adds trailing zero to make - // exponents equal. This simplifies some operations such as subtraction. - FMT_CONSTEXPR20 void align(const bigint& other) { - int exp_difference = exp_ - other.exp_; - if (exp_difference <= 0) return; - int num_bigits = static_cast(bigits_.size()); - bigits_.resize(to_unsigned(num_bigits + exp_difference)); - for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) - bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); - exp_ -= exp_difference; - } - - // Divides this bignum by divisor, assigning the remainder to this and - // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { - FMT_ASSERT(this != &divisor, ""); - if (compare(*this, divisor) < 0) return 0; - FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); - align(divisor); - int quotient = 0; - do { - subtract_aligned(divisor); - ++quotient; - } while (compare(*this, divisor) >= 0); - return quotient; - } -}; - -enum class round_direction { unknown, up, down }; - -// Given the divisor (normally a power of 10), the remainder = v % divisor for -// some number v and the error, returns whether v should be rounded up, down, or -// whether the rounding direction can't be determined due to error. -// error should be less than divisor / 2. -FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor, - uint64_t remainder, - uint64_t error) { - FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. - FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. - FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. - // Round down if (remainder + error) * 2 <= divisor. - if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) - return round_direction::down; - // Round up if (remainder - error) * 2 >= divisor. - if (remainder >= error && - remainder - error >= divisor - (remainder - error)) { - return round_direction::up; - } - return round_direction::unknown; -} - -namespace digits { -enum result { - more, // Generate more digits. - done, // Done generating digits. - error // Digit generation cancelled due to an error. -}; -} - -struct gen_digits_handler { - char* buf; - int size; - int precision; - int exp10; - bool fixed; - - FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor, - uint64_t remainder, uint64_t error, - bool integral) { - FMT_ASSERT(remainder < divisor, ""); - buf[size++] = digit; - if (!integral && error >= remainder) return digits::error; - if (size < precision) return digits::more; - if (!integral) { - // Check if error * 2 < divisor with overflow prevention. - // The check is not needed for the integral part because error = 1 - // and divisor > (1 << 32) there. - if (error >= divisor || error >= divisor - error) return digits::error; - } else { - FMT_ASSERT(error == 1 && divisor > 2, ""); - } - auto dir = get_round_direction(divisor, remainder, error); - if (dir != round_direction::up) - return dir == round_direction::down ? digits::done : digits::error; - ++buf[size - 1]; - for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] > '9') { - buf[0] = '1'; - if (fixed) - buf[size++] = '0'; - else - ++exp10; - } - return digits::done; - } -}; - -inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { - // Adjust fixed precision by exponent because it is relative to decimal - // point. - if (exp10 > 0 && precision > max_value() - exp10) - FMT_THROW(format_error("number is too big")); - precision += exp10; -} - -// Generates output using the Grisu digit-gen algorithm. -// error: the size of the region (lower, upper) outside of which numbers -// definitely do not round to value (Delta in Grisu3). -FMT_INLINE FMT_CONSTEXPR20 digits::result grisu_gen_digits( - fp value, uint64_t error, int& exp, gen_digits_handler& handler) { - const fp one(1ULL << -value.e, value.e); - // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be - // zero because it contains a product of two 64-bit numbers with MSB set (due - // to normalization) - 1, shifted right by at most 60 bits. - auto integral = static_cast(value.f >> -one.e); - FMT_ASSERT(integral != 0, ""); - FMT_ASSERT(integral == value.f >> -one.e, ""); - // The fractional part of scaled value (p2 in Grisu) c = value % one. - uint64_t fractional = value.f & (one.f - 1); - exp = count_digits(integral); // kappa in Grisu. - // Non-fixed formats require at least one digit and no precision adjustment. - if (handler.fixed) { - adjust_precision(handler.precision, exp + handler.exp10); - // Check if precision is satisfied just by leading zeros, e.g. - // format("{:.2f}", 0.001) gives "0.00" without generating any digits. - if (handler.precision <= 0) { - if (handler.precision < 0) return digits::done; - // Divide by 10 to prevent overflow. - uint64_t divisor = impl_data::power_of_10_64[exp - 1] << -one.e; - auto dir = get_round_direction(divisor, value.f / 10, error * 10); - if (dir == round_direction::unknown) return digits::error; - handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0'; - return digits::done; - } - } - // Generate digits for the integral part. This can produce up to 10 digits. - do { - uint32_t digit = 0; - auto divmod_integral = [&](uint32_t divisor) { - digit = integral / divisor; - integral %= divisor; - }; - // This optimization by Milo Yip reduces the number of integer divisions by - // one per iteration. - switch (exp) { - case 10: - divmod_integral(1000000000); - break; - case 9: - divmod_integral(100000000); - break; - case 8: - divmod_integral(10000000); - break; - case 7: - divmod_integral(1000000); - break; - case 6: - divmod_integral(100000); - break; - case 5: - divmod_integral(10000); - break; - case 4: - divmod_integral(1000); - break; - case 3: - divmod_integral(100); - break; - case 2: - divmod_integral(10); - break; - case 1: - digit = integral; - integral = 0; - break; - default: - FMT_ASSERT(false, "invalid number of digits"); - } - --exp; - auto remainder = (static_cast(integral) << -one.e) + fractional; - auto result = handler.on_digit(static_cast('0' + digit), - impl_data::power_of_10_64[exp] << -one.e, - remainder, error, true); - if (result != digits::more) return result; - } while (exp > 0); - // Generate digits for the fractional part. - for (;;) { - fractional *= 10; - error *= 10; - char digit = static_cast('0' + (fractional >> -one.e)); - fractional &= one.f - 1; - --exp; - auto result = handler.on_digit(digit, one.f, fractional, error, false); - if (result != digits::more) return result; - } -} - inline FMT_CONSTEXPR20 uint128_fallback& uint128_fallback::operator+=( uint64_t n) noexcept { if (is_constant_evaluated()) { @@ -1992,143 +1441,6 @@ small_divisor_case_label: } } // namespace dragonbox -// format_dragon flags. -enum dragon { - predecessor_closer = 1, - fixup = 2, // Run fixup to correct exp10 which can be off by one. - fixed = 4, -}; - -// Formats a floating-point number using a variation of the Fixed-Precision -// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White: -// https://fmt.dev/papers/p372-steele.pdf. -FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, - unsigned flags, int num_digits, - buffer& buf, int& exp10) { - bigint numerator; // 2 * R in (FPP)^2. - bigint denominator; // 2 * S in (FPP)^2. - // lower and upper are differences between value and corresponding boundaries. - bigint lower; // (M^- in (FPP)^2). - bigint upper_store; // upper's value if different from lower. - bigint* upper = nullptr; // (M^+ in (FPP)^2). - // Shift numerator and denominator by an extra bit or two (if lower boundary - // is closer) to make lower and upper integers. This eliminates multiplication - // by 2 during later computations. - bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0; - int shift = is_predecessor_closer ? 2 : 1; - if (value.e >= 0) { - numerator = value.f; - numerator <<= value.e + shift; - lower = 1; - lower <<= value.e; - if (is_predecessor_closer) { - upper_store = 1; - upper_store <<= value.e + 1; - upper = &upper_store; - } - denominator.assign_pow10(exp10); - denominator <<= shift; - } else if (exp10 < 0) { - numerator.assign_pow10(-exp10); - lower.assign(numerator); - if (is_predecessor_closer) { - upper_store.assign(numerator); - upper_store <<= 1; - upper = &upper_store; - } - numerator *= value.f; - numerator <<= shift; - denominator = 1; - denominator <<= shift - value.e; - } else { - numerator = value.f; - numerator <<= shift; - denominator.assign_pow10(exp10); - denominator <<= shift - value.e; - lower = 1; - if (is_predecessor_closer) { - upper_store = 1ULL << 1; - upper = &upper_store; - } - } - bool even = (value.f & 1) == 0; - if (!upper) upper = &lower; - if ((flags & dragon::fixup) != 0) { - if (add_compare(numerator, *upper, denominator) + even <= 0) { - --exp10; - numerator *= 10; - if (num_digits < 0) { - lower *= 10; - if (upper != &lower) *upper *= 10; - } - } - if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1); - } - // Invariant: value == (numerator / denominator) * pow(10, exp10). - if (num_digits < 0) { - // Generate the shortest representation. - num_digits = 0; - char* data = buf.data(); - for (;;) { - int digit = numerator.divmod_assign(denominator); - bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. - // numerator + upper >[=] pow10: - bool high = add_compare(numerator, *upper, denominator) + even > 0; - data[num_digits++] = static_cast('0' + digit); - if (low || high) { - if (!low) { - ++data[num_digits - 1]; - } else if (high) { - int result = add_compare(numerator, numerator, denominator); - // Round half to even. - if (result > 0 || (result == 0 && (digit % 2) != 0)) - ++data[num_digits - 1]; - } - buf.try_resize(to_unsigned(num_digits)); - exp10 -= num_digits - 1; - return; - } - numerator *= 10; - lower *= 10; - if (upper != &lower) *upper *= 10; - } - } - // Generate the given number of digits. - exp10 -= num_digits - 1; - if (num_digits == 0) { - denominator *= 10; - auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; - buf.push_back(digit); - return; - } - buf.try_resize(to_unsigned(num_digits)); - for (int i = 0; i < num_digits - 1; ++i) { - int digit = numerator.divmod_assign(denominator); - buf[i] = static_cast('0' + digit); - numerator *= 10; - } - int digit = numerator.divmod_assign(denominator); - auto result = add_compare(numerator, numerator, denominator); - if (result > 0 || (result == 0 && (digit % 2) != 0)) { - if (digit == 9) { - const auto overflow = '0' + 10; - buf[num_digits - 1] = overflow; - // Propagate the carry. - for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] == overflow) { - buf[0] = '1'; - ++exp10; - } - return; - } - ++digit; - } - buf[num_digits - 1] = static_cast('0' + digit); -} - #ifdef _MSC_VER FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) -> int { @@ -2139,95 +1451,6 @@ FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) return result; } #endif - -template -FMT_HEADER_ONLY_CONSTEXPR20 int format_float(Float value, int precision, - float_specs specs, - buffer& buf) { - // float is passed as double to reduce the number of instantiations. - static_assert(!std::is_same::value, ""); - FMT_ASSERT(value >= 0, "value is negative"); - auto converted_value = convert_float(value); - - const bool fixed = specs.format == float_format::fixed; - if (value <= 0) { // <= instead of == to silence a warning. - if (precision <= 0 || !fixed) { - buf.push_back('0'); - return 0; - } - buf.try_resize(to_unsigned(precision)); - fill_n(buf.data(), precision, '0'); - return -precision; - } - - int exp = 0; - bool use_dragon = true; - unsigned dragon_flags = 0; - if (!is_fast_float()) { - const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10) - using info = dragonbox::float_info; - const auto f = basic_fp(converted_value); - // Compute exp, an approximate power of 10, such that - // 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1). - // This is based on log10(value) == log2(value) / log2(10) and approximation - // of log2(value) by e + num_fraction_bits idea from double-conversion. - exp = static_cast( - std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10)); - dragon_flags = dragon::fixup; - } else if (!is_constant_evaluated() && precision < 0) { - // Use Dragonbox for the shortest format. - if (specs.binary32) { - auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); - return dec.exponent; - } - auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); - return dec.exponent; - } else { - // Use Grisu + Dragon4 for the given precision: - // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. - const int min_exp = -60; // alpha in Grisu. - int cached_exp10 = 0; // K in Grisu. - fp normalized = normalize(fp(converted_value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); - normalized = normalized * cached_pow; - gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error && - !is_constant_evaluated()) { - exp += handler.exp10; - buf.try_resize(to_unsigned(handler.size)); - use_dragon = false; - } else { - exp += handler.size - cached_exp10 - 1; - precision = handler.precision; - } - } - if (use_dragon) { - auto f = basic_fp(); - bool is_predecessor_closer = specs.binary32 - ? f.assign(static_cast(value)) - : f.assign(converted_value); - if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer; - if (fixed) dragon_flags |= dragon::fixed; - // Limit precision to the maximum possible number of significant digits in - // an IEEE754 double because we don't need to generate zeros. - const int max_double_digits = 767; - if (precision > max_double_digits) precision = max_double_digits; - format_dragon(f, dragon_flags, precision, buf, exp); - } - if (!fixed && !specs.showpoint) { - // Remove trailing zeros. - auto num_digits = buf.size(); - while (num_digits > 0 && buf[num_digits - 1] == '0') { - --num_digits; - ++exp; - } - buf.try_resize(num_digits); - } - return exp; -} } // namespace detail template <> struct formatter { diff --git a/include/fmt/format.h b/include/fmt/format.h index 610a99c5..71f59c9c 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1410,10 +1410,130 @@ template struct basic_fp { } }; -template -FMT_HEADER_ONLY_CONSTEXPR20 auto format_float(T value, int precision, - float_specs specs, - buffer& buf) -> int; +using fp = basic_fp; + +// Normalizes the value converted from double and multiplied by (1 << SHIFT). +template +FMT_CONSTEXPR basic_fp normalize(basic_fp value) { + // Handle subnormals. + const auto implicit_bit = F(1) << num_significand_bits(); + const auto shifted_implicit_bit = implicit_bit << SHIFT; + while ((value.f & shifted_implicit_bit) == 0) { + value.f <<= 1; + --value.e; + } + // Subtract 1 to account for hidden bit. + const auto offset = basic_fp::num_significand_bits - + num_significand_bits() - SHIFT - 1; + value.f <<= offset; + value.e -= offset; + return value; +} + +// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. +FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +#if FMT_USE_INT128 + auto product = static_cast<__uint128_t>(lhs) * rhs; + auto f = static_cast(product >> 64); + return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; +#else + // Multiply 32-bit parts of significands. + uint64_t mask = (1ULL << 32) - 1; + uint64_t a = lhs >> 32, b = lhs & mask; + uint64_t c = rhs >> 32, d = rhs & mask; + uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; + // Compute mid 64-bit of result and round. + uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); + return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); +#endif +} + +FMT_CONSTEXPR inline fp operator*(fp x, fp y) { + return {multiply(x.f, y.f), x.e + y.e + 64}; +} + +template struct basic_data { + // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. + // These are generated by support/compute-powers.py. + static constexpr uint64_t pow10_significands[87] = { + 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, + 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, + 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, + 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, + 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, + 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, + 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, + 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, + 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, + 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, + 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, + 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, + 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, + 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, + 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, + 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, + 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, + 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, + 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, + 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, + 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, + 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, + 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, + 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, + 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, + 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, + 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, + 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, + 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, + }; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wnarrowing" +#endif + // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding + // to significands above. + static constexpr int16_t pow10_exponents[87] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, + -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, + -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, + -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, + -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, + 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, + 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, + 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +# pragma GCC diagnostic pop +#endif + + static constexpr uint64_t power_of_10_64[20] = { + 1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; +}; + +// This is a struct rather than an alias to avoid shadowing warnings in gcc. +struct data : basic_data<> {}; + +// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its +// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. +FMT_CONSTEXPR inline fp get_cached_power(int min_exponent, + int& pow10_exponent) { + const int shift = 32; + // log10(2) = 0x0.4d104d427de7fbcc... + const int64_t significand = 0x4d104d427de7fbcc; + int index = static_cast( + ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) + + ((int64_t(1) << shift) - 1)) // ceil + >> 32 // arithmetic shift + ); + // Decimal exponent of the first (smallest) cached power of 10. + const int first_dec_exp = -348; + // Difference between 2 consecutive decimal exponents in cached powers of 10. + const int dec_exp_step = 8; + index = (index - first_dec_exp - 1) / dec_exp_step + 1; + pow10_exponent = first_dec_exp + index * dec_exp_step; + return {data::pow10_significands[index], data::pow10_exponents[index]}; +} #ifndef _MSC_VER # define FMT_SNPRINTF snprintf @@ -2055,6 +2175,28 @@ FMT_CONSTEXPR auto write(OutputIt out, const Char* s, : write_ptr(out, bit_cast(s), &specs); } +template ::value && + !std::is_same::value && + !std::is_same::value)> +FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { + auto abs_value = static_cast>(value); + bool negative = is_negative(value); + // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. + if (negative) abs_value = ~abs_value + 1; + int num_digits = count_digits(abs_value); + auto size = (negative ? 1 : 0) + static_cast(num_digits); + auto it = reserve(out, size); + if (auto ptr = to_pointer(it, size)) { + if (negative) *ptr++ = static_cast('-'); + format_decimal(ptr, abs_value, num_digits); + return out; + } + if (negative) *it++ = static_cast('-'); + it = format_decimal(it, abs_value, num_digits).end; + return base_iterator(out, it); +} + template FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan, basic_format_specs specs, @@ -2081,12 +2223,12 @@ struct big_decimal_fp { int exponent; }; -constexpr auto get_significand_size(const big_decimal_fp& fp) -> int { - return fp.significand_size; +constexpr auto get_significand_size(const big_decimal_fp& f) -> int { + return f.significand_size; } template -inline auto get_significand_size(const dragonbox::decimal_fp& fp) -> int { - return count_digits(fp.significand); +inline auto get_significand_size(const dragonbox::decimal_fp& f) -> int { + return count_digits(f.significand); } template @@ -2180,12 +2322,12 @@ FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand, template > -FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& fp, +FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, const basic_format_specs& specs, float_specs fspecs, locale_ref loc) -> OutputIt { - auto significand = fp.significand; - int significand_size = get_significand_size(fp); + auto significand = f.significand; + int significand_size = get_significand_size(f); const Char zero = static_cast('0'); auto sign = fspecs.sign; size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); @@ -2194,7 +2336,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& fp, Char decimal_point = fspecs.locale ? detail::decimal_point(loc) : static_cast('.'); - int output_exp = fp.exponent + significand_size - 1; + int output_exp = f.exponent + significand_size - 1; auto use_exp_format = [=]() { if (fspecs.format == float_format::exp) return true; if (fspecs.format != float_format::general) return false; @@ -2232,10 +2374,10 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& fp, : base_iterator(out, write(reserve(out, size))); } - int exp = fp.exponent + significand_size; - if (fp.exponent >= 0) { + int exp = f.exponent + significand_size; + if (f.exponent >= 0) { // 1234e5 -> 123400000[.0+] - size += to_unsigned(fp.exponent); + size += to_unsigned(f.exponent); int num_zeros = fspecs.precision - exp; abort_fuzzing_if(num_zeros > 5000); if (fspecs.showpoint) { @@ -2248,7 +2390,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& fp, return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); it = write_significand(it, significand, significand_size, - fp.exponent, grouping); + f.exponent, grouping); if (!fspecs.showpoint) return it; *it++ = decimal_point; return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; @@ -2299,16 +2441,16 @@ template class fallback_digit_grouping { }; template -FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& fp, +FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, const basic_format_specs& specs, float_specs fspecs, locale_ref loc) -> OutputIt { if (is_constant_evaluated()) { return do_write_float>(out, fp, specs, fspecs, + fallback_digit_grouping>(out, f, specs, fspecs, loc); } else { - return do_write_float(out, fp, specs, fspecs, loc); + return do_write_float(out, f, specs, fspecs, loc); } } @@ -2351,6 +2493,659 @@ FMT_INLINE FMT_CONSTEXPR bool signbit(T value) { return std::signbit(static_cast(value)); } +enum class round_direction { unknown, up, down }; + +// Given the divisor (normally a power of 10), the remainder = v % divisor for +// some number v and the error, returns whether v should be rounded up, down, or +// whether the rounding direction can't be determined due to error. +// error should be less than divisor / 2. +FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor, + uint64_t remainder, + uint64_t error) { + FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. + FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. + FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. + // Round down if (remainder + error) * 2 <= divisor. + if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) + return round_direction::down; + // Round up if (remainder - error) * 2 >= divisor. + if (remainder >= error && + remainder - error >= divisor - (remainder - error)) { + return round_direction::up; + } + return round_direction::unknown; +} + +namespace digits { +enum result { + more, // Generate more digits. + done, // Done generating digits. + error // Digit generation cancelled due to an error. +}; +} + +struct gen_digits_handler { + char* buf; + int size; + int precision; + int exp10; + bool fixed; + + FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor, + uint64_t remainder, uint64_t error, + bool integral) { + FMT_ASSERT(remainder < divisor, ""); + buf[size++] = digit; + if (!integral && error >= remainder) return digits::error; + if (size < precision) return digits::more; + if (!integral) { + // Check if error * 2 < divisor with overflow prevention. + // The check is not needed for the integral part because error = 1 + // and divisor > (1 << 32) there. + if (error >= divisor || error >= divisor - error) return digits::error; + } else { + FMT_ASSERT(error == 1 && divisor > 2, ""); + } + auto dir = get_round_direction(divisor, remainder, error); + if (dir != round_direction::up) + return dir == round_direction::down ? digits::done : digits::error; + ++buf[size - 1]; + for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] > '9') { + buf[0] = '1'; + if (fixed) + buf[size++] = '0'; + else + ++exp10; + } + return digits::done; + } +}; + +inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { + // Adjust fixed precision by exponent because it is relative to decimal + // point. + if (exp10 > 0 && precision > max_value() - exp10) + FMT_THROW(format_error("number is too big")); + precision += exp10; +} + +// Generates output using the Grisu digit-gen algorithm. +// error: the size of the region (lower, upper) outside of which numbers +// definitely do not round to value (Delta in Grisu3). +FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error, + int& exp, + gen_digits_handler& handler) + -> digits::result { + const fp one(1ULL << -value.e, value.e); + // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be + // zero because it contains a product of two 64-bit numbers with MSB set (due + // to normalization) - 1, shifted right by at most 60 bits. + auto integral = static_cast(value.f >> -one.e); + FMT_ASSERT(integral != 0, ""); + FMT_ASSERT(integral == value.f >> -one.e, ""); + // The fractional part of scaled value (p2 in Grisu) c = value % one. + uint64_t fractional = value.f & (one.f - 1); + exp = count_digits(integral); // kappa in Grisu. + // Non-fixed formats require at least one digit and no precision adjustment. + if (handler.fixed) { + adjust_precision(handler.precision, exp + handler.exp10); + // Check if precision is satisfied just by leading zeros, e.g. + // format("{:.2f}", 0.001) gives "0.00" without generating any digits. + if (handler.precision <= 0) { + if (handler.precision < 0) return digits::done; + // Divide by 10 to prevent overflow. + uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e; + auto dir = get_round_direction(divisor, value.f / 10, error * 10); + if (dir == round_direction::unknown) return digits::error; + handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0'; + return digits::done; + } + } + // Generate digits for the integral part. This can produce up to 10 digits. + do { + uint32_t digit = 0; + auto divmod_integral = [&](uint32_t divisor) { + digit = integral / divisor; + integral %= divisor; + }; + // This optimization by Milo Yip reduces the number of integer divisions by + // one per iteration. + switch (exp) { + case 10: + divmod_integral(1000000000); + break; + case 9: + divmod_integral(100000000); + break; + case 8: + divmod_integral(10000000); + break; + case 7: + divmod_integral(1000000); + break; + case 6: + divmod_integral(100000); + break; + case 5: + divmod_integral(10000); + break; + case 4: + divmod_integral(1000); + break; + case 3: + divmod_integral(100); + break; + case 2: + divmod_integral(10); + break; + case 1: + digit = integral; + integral = 0; + break; + default: + FMT_ASSERT(false, "invalid number of digits"); + } + --exp; + auto remainder = (static_cast(integral) << -one.e) + fractional; + auto result = handler.on_digit(static_cast('0' + digit), + data::power_of_10_64[exp] << -one.e, + remainder, error, true); + if (result != digits::more) return result; + } while (exp > 0); + // Generate digits for the fractional part. + for (;;) { + fractional *= 10; + error *= 10; + char digit = static_cast('0' + (fractional >> -one.e)); + fractional &= one.f - 1; + --exp; + auto result = handler.on_digit(digit, one.f, fractional, error, false); + if (result != digits::more) return result; + } +} + +class bigint { + private: + // A bigint is stored as an array of bigits (big digits), with bigit at index + // 0 being the least significant one. + using bigit = uint32_t; + using double_bigit = uint64_t; + enum { bigits_capacity = 32 }; + basic_memory_buffer bigits_; + int exp_; + + FMT_CONSTEXPR20 bigit operator[](int index) const { + return bigits_[to_unsigned(index)]; + } + FMT_CONSTEXPR20 bigit& operator[](int index) { + return bigits_[to_unsigned(index)]; + } + + static FMT_CONSTEXPR_DECL const int bigit_bits = num_bits(); + + friend struct formatter; + + FMT_CONSTEXPR20 void subtract_bigits(int index, bigit other, bigit& borrow) { + auto result = static_cast((*this)[index]) - other - borrow; + (*this)[index] = static_cast(result); + borrow = static_cast(result >> (bigit_bits * 2 - 1)); + } + + FMT_CONSTEXPR20 void remove_leading_zeros() { + int num_bigits = static_cast(bigits_.size()) - 1; + while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; + bigits_.resize(to_unsigned(num_bigits + 1)); + } + + // Computes *this -= other assuming aligned bigints and *this >= other. + FMT_CONSTEXPR20 void subtract_aligned(const bigint& other) { + FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); + FMT_ASSERT(compare(*this, other) >= 0, ""); + bigit borrow = 0; + int i = other.exp_ - exp_; + for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) + subtract_bigits(i, other.bigits_[j], borrow); + while (borrow > 0) subtract_bigits(i, 0, borrow); + remove_leading_zeros(); + } + + FMT_CONSTEXPR20 void multiply(uint32_t value) { + const double_bigit wide_value = value; + bigit carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + double_bigit result = bigits_[i] * wide_value + carry; + bigits_[i] = static_cast(result); + carry = static_cast(result >> bigit_bits); + } + if (carry != 0) bigits_.push_back(carry); + } + + template ::value || + std::is_same::value)> + FMT_CONSTEXPR20 void multiply(UInt value) { + using half_uint = + conditional_t::value, uint64_t, uint32_t>; + const int shift = num_bits() - bigit_bits; + const UInt lower = static_cast(value); + const UInt upper = value >> num_bits(); + UInt carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + UInt result = lower * bigits_[i] + static_cast(carry); + carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) + + (carry >> bigit_bits); + bigits_[i] = static_cast(result); + } + while (carry != 0) { + bigits_.push_back(static_cast(carry)); + carry >>= bigit_bits; + } + } + + template ::value || + std::is_same::value)> + FMT_CONSTEXPR20 void assign(UInt n) { + size_t num_bigits = 0; + do { + bigits_[num_bigits++] = static_cast(n); + n >>= bigit_bits; + } while (n != 0); + bigits_.resize(num_bigits); + exp_ = 0; + } + + public: + FMT_CONSTEXPR20 bigint() : exp_(0) {} + explicit bigint(uint64_t n) { assign(n); } + + bigint(const bigint&) = delete; + void operator=(const bigint&) = delete; + + FMT_CONSTEXPR20 void assign(const bigint& other) { + auto size = other.bigits_.size(); + bigits_.resize(size); + auto data = other.bigits_.data(); + std::copy(data, data + size, make_checked(bigits_.data(), size)); + exp_ = other.exp_; + } + + template FMT_CONSTEXPR20 void operator=(Int n) { + FMT_ASSERT(n > 0, ""); + assign(uint64_or_128_t(n)); + } + + FMT_CONSTEXPR20 int num_bigits() const { + return static_cast(bigits_.size()) + exp_; + } + + FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_ASSERT(shift >= 0, ""); + exp_ += shift / bigit_bits; + shift %= bigit_bits; + if (shift == 0) return *this; + bigit carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + bigit c = bigits_[i] >> (bigit_bits - shift); + bigits_[i] = (bigits_[i] << shift) + carry; + carry = c; + } + if (carry != 0) bigits_.push_back(carry); + return *this; + } + + template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + FMT_ASSERT(value > 0, ""); + multiply(uint32_or_64_or_128_t(value)); + return *this; + } + + friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); + if (num_lhs_bigits != num_rhs_bigits) + return num_lhs_bigits > num_rhs_bigits ? 1 : -1; + int i = static_cast(lhs.bigits_.size()) - 1; + int j = static_cast(rhs.bigits_.size()) - 1; + int end = i - j; + if (end < 0) end = 0; + for (; i >= end; --i, --j) { + bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; + if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; + } + if (i != j) return i > j ? 1 : -1; + return 0; + } + + // Returns compare(lhs1 + lhs2, rhs). + friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, + const bigint& rhs) { + auto minimum = [](int a, int b) { return a < b ? a : b; }; + auto maximum = [](int a, int b) { return a > b ? a : b; }; + int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); + int num_rhs_bigits = rhs.num_bigits(); + if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; + if (max_lhs_bigits > num_rhs_bigits) return 1; + auto get_bigit = [](const bigint& n, int i) -> bigit { + return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; + }; + double_bigit borrow = 0; + int min_exp = minimum(minimum(lhs1.exp_, lhs2.exp_), rhs.exp_); + for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { + double_bigit sum = + static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); + bigit rhs_bigit = get_bigit(rhs, i); + if (sum > rhs_bigit + borrow) return 1; + borrow = rhs_bigit + borrow - sum; + if (borrow > 1) return -1; + borrow <<= bigit_bits; + } + return borrow != 0 ? -1 : 0; + } + + // Assigns pow(10, exp) to this bigint. + FMT_CONSTEXPR20 void assign_pow10(int exp) { + FMT_ASSERT(exp >= 0, ""); + if (exp == 0) return *this = 1; + // Find the top bit. + int bitmask = 1; + while (exp >= bitmask) bitmask <<= 1; + bitmask >>= 1; + // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by + // repeated squaring and multiplication. + *this = 5; + bitmask >>= 1; + while (bitmask != 0) { + square(); + if ((exp & bitmask) != 0) *this *= 5; + bitmask >>= 1; + } + *this <<= exp; // Multiply by pow(2, exp) by shifting. + } + + FMT_CONSTEXPR20 void square() { + int num_bigits = static_cast(bigits_.size()); + int num_result_bigits = 2 * num_bigits; + basic_memory_buffer n(std::move(bigits_)); + bigits_.resize(to_unsigned(num_result_bigits)); + auto sum = uint128_t(); + for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { + // Compute bigit at position bigit_index of the result by adding + // cross-product terms n[i] * n[j] such that i + j == bigit_index. + for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { + // Most terms are multiplied twice which can be optimized in the future. + sum += static_cast(n[i]) * n[j]; + } + (*this)[bigit_index] = static_cast(sum); + sum >>= num_bits(); // Compute the carry. + } + // Do the same for the top half. + for (int bigit_index = num_bigits; bigit_index < num_result_bigits; + ++bigit_index) { + for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) + sum += static_cast(n[i++]) * n[j--]; + (*this)[bigit_index] = static_cast(sum); + sum >>= num_bits(); + } + remove_leading_zeros(); + exp_ *= 2; + } + + // If this bigint has a bigger exponent than other, adds trailing zero to make + // exponents equal. This simplifies some operations such as subtraction. + FMT_CONSTEXPR20 void align(const bigint& other) { + int exp_difference = exp_ - other.exp_; + if (exp_difference <= 0) return; + int num_bigits = static_cast(bigits_.size()); + bigits_.resize(to_unsigned(num_bigits + exp_difference)); + for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) + bigits_[j] = bigits_[i]; + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + exp_ -= exp_difference; + } + + // Divides this bignum by divisor, assigning the remainder to this and + // returning the quotient. + FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_ASSERT(this != &divisor, ""); + if (compare(*this, divisor) < 0) return 0; + FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); + align(divisor); + int quotient = 0; + do { + subtract_aligned(divisor); + ++quotient; + } while (compare(*this, divisor) >= 0); + return quotient; + } +}; + +// format_dragon flags. +enum dragon { + predecessor_closer = 1, + fixup = 2, // Run fixup to correct exp10 which can be off by one. + fixed = 4, +}; + +// Formats a floating-point number using a variation of the Fixed-Precision +// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White: +// https://fmt.dev/papers/p372-steele.pdf. +FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, + unsigned flags, int num_digits, + buffer& buf, int& exp10) { + bigint numerator; // 2 * R in (FPP)^2. + bigint denominator; // 2 * S in (FPP)^2. + // lower and upper are differences between value and corresponding boundaries. + bigint lower; // (M^- in (FPP)^2). + bigint upper_store; // upper's value if different from lower. + bigint* upper = nullptr; // (M^+ in (FPP)^2). + // Shift numerator and denominator by an extra bit or two (if lower boundary + // is closer) to make lower and upper integers. This eliminates multiplication + // by 2 during later computations. + bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0; + int shift = is_predecessor_closer ? 2 : 1; + if (value.e >= 0) { + numerator = value.f; + numerator <<= value.e + shift; + lower = 1; + lower <<= value.e; + if (is_predecessor_closer) { + upper_store = 1; + upper_store <<= value.e + 1; + upper = &upper_store; + } + denominator.assign_pow10(exp10); + denominator <<= shift; + } else if (exp10 < 0) { + numerator.assign_pow10(-exp10); + lower.assign(numerator); + if (is_predecessor_closer) { + upper_store.assign(numerator); + upper_store <<= 1; + upper = &upper_store; + } + numerator *= value.f; + numerator <<= shift; + denominator = 1; + denominator <<= shift - value.e; + } else { + numerator = value.f; + numerator <<= shift; + denominator.assign_pow10(exp10); + denominator <<= shift - value.e; + lower = 1; + if (is_predecessor_closer) { + upper_store = 1ULL << 1; + upper = &upper_store; + } + } + bool even = (value.f & 1) == 0; + if (!upper) upper = &lower; + if ((flags & dragon::fixup) != 0) { + if (add_compare(numerator, *upper, denominator) + even <= 0) { + --exp10; + numerator *= 10; + if (num_digits < 0) { + lower *= 10; + if (upper != &lower) *upper *= 10; + } + } + if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1); + } + // Invariant: value == (numerator / denominator) * pow(10, exp10). + if (num_digits < 0) { + // Generate the shortest representation. + num_digits = 0; + char* data = buf.data(); + for (;;) { + int digit = numerator.divmod_assign(denominator); + bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. + // numerator + upper >[=] pow10: + bool high = add_compare(numerator, *upper, denominator) + even > 0; + data[num_digits++] = static_cast('0' + digit); + if (low || high) { + if (!low) { + ++data[num_digits - 1]; + } else if (high) { + int result = add_compare(numerator, numerator, denominator); + // Round half to even. + if (result > 0 || (result == 0 && (digit % 2) != 0)) + ++data[num_digits - 1]; + } + buf.try_resize(to_unsigned(num_digits)); + exp10 -= num_digits - 1; + return; + } + numerator *= 10; + lower *= 10; + if (upper != &lower) *upper *= 10; + } + } + // Generate the given number of digits. + exp10 -= num_digits - 1; + if (num_digits == 0) { + denominator *= 10; + auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + buf.push_back(digit); + return; + } + buf.try_resize(to_unsigned(num_digits)); + for (int i = 0; i < num_digits - 1; ++i) { + int digit = numerator.divmod_assign(denominator); + buf[i] = static_cast('0' + digit); + numerator *= 10; + } + int digit = numerator.divmod_assign(denominator); + auto result = add_compare(numerator, numerator, denominator); + if (result > 0 || (result == 0 && (digit % 2) != 0)) { + if (digit == 9) { + const auto overflow = '0' + 10; + buf[num_digits - 1] = overflow; + // Propagate the carry. + for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] == overflow) { + buf[0] = '1'; + ++exp10; + } + return; + } + ++digit; + } + buf[num_digits - 1] = static_cast('0' + digit); +} + +template +FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, + buffer& buf) -> int { + // float is passed as double to reduce the number of instantiations. + static_assert(!std::is_same::value, ""); + FMT_ASSERT(value >= 0, "value is negative"); + auto converted_value = convert_float(value); + + const bool fixed = specs.format == float_format::fixed; + if (value <= 0) { // <= instead of == to silence a warning. + if (precision <= 0 || !fixed) { + buf.push_back('0'); + return 0; + } + buf.try_resize(to_unsigned(precision)); + fill_n(buf.data(), precision, '0'); + return -precision; + } + + int exp = 0; + bool use_dragon = true; + unsigned dragon_flags = 0; + if (!is_fast_float()) { + const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10) + using info = dragonbox::float_info; + const auto f = basic_fp(converted_value); + // Compute exp, an approximate power of 10, such that + // 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1). + // This is based on log10(value) == log2(value) / log2(10) and approximation + // of log2(value) by e + num_fraction_bits idea from double-conversion. + exp = static_cast( + std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10)); + dragon_flags = dragon::fixup; + } else if (!is_constant_evaluated() && precision < 0) { + // Use Dragonbox for the shortest format. + if (specs.binary32) { + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } else { + // Use Grisu + Dragon4 for the given precision: + // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. + const int min_exp = -60; // alpha in Grisu. + int cached_exp10 = 0; // K in Grisu. + fp normalized = normalize(fp(converted_value)); + const auto cached_pow = get_cached_power( + min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); + normalized = normalized * cached_pow; + gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; + if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error && + !is_constant_evaluated()) { + exp += handler.exp10; + buf.try_resize(to_unsigned(handler.size)); + use_dragon = false; + } else { + exp += handler.size - cached_exp10 - 1; + precision = handler.precision; + } + } + if (use_dragon) { + auto f = basic_fp(); + bool is_predecessor_closer = specs.binary32 + ? f.assign(static_cast(value)) + : f.assign(converted_value); + if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer; + if (fixed) dragon_flags |= dragon::fixed; + // Limit precision to the maximum possible number of significant digits in + // an IEEE754 double because we don't need to generate zeros. + const int max_double_digits = 767; + if (precision > max_double_digits) precision = max_double_digits; + format_dragon(f, dragon_flags, precision, buf, exp); + } + if (!fixed && !specs.showpoint) { + // Remove trailing zeros. + auto num_digits = buf.size(); + while (num_digits > 0 && buf[num_digits - 1] == '0') { + --num_digits; + ++exp; + } + buf.try_resize(num_digits); + } + return exp; +} + template ::value)> FMT_CONSTEXPR20 auto write(OutputIt out, T value, @@ -2398,8 +3193,8 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value, if (const_check(std::is_same())) fspecs.binary32 = true; int exp = format_float(convert_float(value), precision, fspecs, buffer); fspecs.precision = precision; - auto fp = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; - return write_float(out, fp, specs, fspecs, loc); + auto f = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; + return write_float(out, f, specs, fspecs, loc); } template OutputIt { return write(out, to_string_view(value)); } -template ::value && - !std::is_same::value && - !std::is_same::value)> -FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { - auto abs_value = static_cast>(value); - bool negative = is_negative(value); - // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. - if (negative) abs_value = ~abs_value + 1; - int num_digits = count_digits(abs_value); - auto size = (negative ? 1 : 0) + static_cast(num_digits); - auto it = reserve(out, size); - if (auto ptr = to_pointer(it, size)) { - if (negative) *ptr++ = static_cast('-'); - format_decimal(ptr, abs_value, num_digits); - return out; - } - if (negative) *it++ = static_cast('-'); - it = format_decimal(it, abs_value, num_digits).end; - return base_iterator(out, it); -} - // FMT_ENABLE_IF() condition separated to workaround an MSVC bug. template < typename Char, typename OutputIt, typename T,