diff --git a/include/fmt/format.h b/include/fmt/format.h index 3f8507f7..08b11a6e 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -659,21 +659,9 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { } while (buf_ptr < buf + num_chars_left); } -template -inline auto compute_width(basic_string_view s) -> size_t { - return s.size(); -} - -// Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { - size_t num_code_points = 0; - // It is not a lambda for compatibility with C++14. - struct count_code_points { - size_t* count; - FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool { - *count += to_unsigned( - 1 + - (cp >= 0x1100 && +FMT_CONSTEXPR inline auto display_width_of(uint32_t cp) noexcept -> size_t { + return to_unsigned( + 1 + (cp >= 0x1100 && (cp <= 0x115f || // Hangul Jamo init. consonants cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET @@ -691,12 +679,6 @@ FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { (cp >= 0x1f300 && cp <= 0x1f64f) || // Supplemental Symbols and Pictographs: (cp >= 0x1f900 && cp <= 0x1f9ff)))); - return true; - } - }; - // We could avoid branches by using utf8_decode directly. - for_each_codepoint(s, count_code_points{&num_code_points}); - return num_code_points; } template struct is_integral : std::is_integral {}; @@ -2130,35 +2112,98 @@ FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, return write_int(out, make_write_int_arg(value, specs.sign()), specs); } -inline auto convert_precision_to_size(string_view s, size_t precision) - -> size_t { - size_t display_width = 0; - size_t result = s.size(); - for_each_codepoint(s, [&](uint32_t, string_view sv) { - display_width += compute_width(sv); - // Stop when display width exceeds precision. - if (display_width > precision) { - result = to_unsigned(sv.begin() - s.begin()); +template ::value)> +FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, + const format_specs& specs) -> OutputIt { + bool is_debug = specs.type() == presentation_type::debug; + if (specs.precision < 0 && specs.width == 0) { + auto&& it = reserve(out, s.size()); + return is_debug ? write_escaped_string(it, s) : copy(s, it); + } + + size_t display_width_limit = + specs.precision < 0 ? SIZE_MAX : to_unsigned(specs.precision); + size_t display_width = + !is_debug || specs.precision == 0 ? 0 : 1; // Account for opening " + size_t size = !is_debug || specs.precision == 0 ? 0 : 1; + for_each_codepoint(s, [&](uint32_t cp, string_view sv) { + if (is_debug && needs_escape(cp)) { + counting_buffer buf; + write_escaped_cp(basic_appender(buf), + find_escape_result{sv.begin(), sv.end(), cp}); + // We're reinterpreting bytes as display width. That's okay + // because write_escaped_cp() only writes ASCII characters. + size_t cp_width = buf.count(); + if (display_width + cp_width <= display_width_limit) { + display_width += cp_width; + size += cp_width; + // If this is the end of the string, account for closing " + if (display_width < display_width_limit && sv.end() == s.end()) { + ++display_width; + ++size; + } + return true; + } + + size += display_width_limit - display_width; + display_width = display_width_limit; return false; } - return true; + + size_t cp_width = display_width_of(cp); + if (cp_width + display_width <= display_width_limit) { + display_width += cp_width; + size += sv.size(); + // If this is the end of the string, account for closing " + if (is_debug && display_width < display_width_limit && + sv.end() == s.end()) { + ++display_width; + ++size; + } + return true; + } + + return false; }); - return result; + + struct bounded_output_iterator { + reserve_iterator underlying_iterator; + size_t bound; + + FMT_CONSTEXPR auto operator*() -> bounded_output_iterator& { return *this; } + FMT_CONSTEXPR auto operator++() -> bounded_output_iterator& { + return *this; + } + FMT_CONSTEXPR auto operator++(int) -> bounded_output_iterator& { + return *this; + } + FMT_CONSTEXPR auto operator=(char c) -> bounded_output_iterator& { + if (bound > 0) { + *underlying_iterator++ = c; + --bound; + } + return *this; + } + }; + + return write_padded( + out, specs, size, display_width, [&](reserve_iterator it) { + return is_debug + ? write_escaped_string(bounded_output_iterator{it, size}, s) + .underlying_iterator + : copy(s.data(), s.data() + size, it); + }); } -template ::value)> -auto convert_precision_to_size(basic_string_view, size_t precision) - -> size_t { - return precision; -} - -template +template ::value)> FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, const format_specs& specs) -> OutputIt { auto data = s.data(); auto size = s.size(); if (specs.precision >= 0 && to_unsigned(specs.precision) < size) - size = convert_precision_to_size(s, to_unsigned(specs.precision)); + size = to_unsigned(specs.precision); bool is_debug = specs.type() == presentation_type::debug; if (is_debug) { @@ -2167,22 +2212,19 @@ FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, size = buf.count(); } - size_t width = 0; - if (specs.width != 0) { - width = - is_debug ? size : compute_width(basic_string_view(data, size)); - } return write_padded( - out, specs, size, width, [=](reserve_iterator it) { + out, specs, size, [&](reserve_iterator it) { return is_debug ? write_escaped_string(it, s) : copy(data, data + size, it); }); } + template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, const format_specs& specs, locale_ref) -> OutputIt { return write(out, s, specs); } + template FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, locale_ref) -> OutputIt { diff --git a/test/format-test.cc b/test/format-test.cc index 124b9bd2..322b2794 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -206,10 +206,6 @@ TEST(util_test, parse_nonnegative_int) { EXPECT_EQ(fmt::detail::parse_nonnegative_int(begin, end, -1), -1); } -TEST(format_impl_test, compute_width) { - EXPECT_EQ(fmt::detail::compute_width("вожык"), 5); -} - TEST(util_test, utf8_to_utf16) { auto u = fmt::detail::utf8_to_utf16("лошадка"); EXPECT_EQ(L"\x043B\x043E\x0448\x0430\x0434\x043A\x0430", u.str()); @@ -887,6 +883,7 @@ TEST(format_test, width) { " 0xcafe"); EXPECT_EQ(fmt::format("{:11}", 'x'), "x "); EXPECT_EQ(fmt::format("{:12}", "str"), "str "); + EXPECT_EQ(fmt::format("{:*^5}", "🤡"), "*🤡**"); EXPECT_EQ(fmt::format("{:*^6}", "🤡"), "**🤡**"); EXPECT_EQ(fmt::format("{:*^8}", "你好"), "**你好**"); EXPECT_EQ(fmt::format("{:#6}", 42.0), " 42."); @@ -894,6 +891,31 @@ TEST(format_test, width) { EXPECT_EQ(fmt::format("{:>06.0f}", 0.00884311), " 0"); } +TEST(format_test, debug_presentation) { + EXPECT_EQ(fmt::format("{:?}", ""), R"("")"); + + EXPECT_EQ(fmt::format("{:*<5.0?}", "\n"), R"(*****)"); + EXPECT_EQ(fmt::format("{:*<5.1?}", "\n"), R"("****)"); + EXPECT_EQ(fmt::format("{:*<5.2?}", "\n"), R"("\***)"); + EXPECT_EQ(fmt::format("{:*<5.3?}", "\n"), R"("\n**)"); + EXPECT_EQ(fmt::format("{:*<5.4?}", "\n"), R"("\n"*)"); + + EXPECT_EQ(fmt::format("{:*<5.1?}", "Σ"), R"("****)"); + EXPECT_EQ(fmt::format("{:*<5.2?}", "Σ"), R"("Σ***)"); + EXPECT_EQ(fmt::format("{:*<5.3?}", "Σ"), R"("Σ"**)"); + + EXPECT_EQ(fmt::format("{:*<5.1?}", "笑"), R"("****)"); + EXPECT_EQ(fmt::format("{:*<5.2?}", "笑"), R"("****)"); + EXPECT_EQ(fmt::format("{:*<5.3?}", "笑"), R"("笑**)"); + EXPECT_EQ(fmt::format("{:*<5.4?}", "笑"), R"("笑"*)"); + + EXPECT_EQ(fmt::format("{:*<8?}", "туда"), R"("туда"**)"); + EXPECT_EQ(fmt::format("{:*>8?}", "сюда"), R"(**"сюда")"); + EXPECT_EQ(fmt::format("{:*^8?}", "中心"), R"(*"中心"*)"); + + EXPECT_EQ(fmt::format("{:*^14?}", "A\t👈🤯ы猫"), R"(*"A\t👈🤯ы猫"*)"); +} + auto bad_dynamic_spec_msg = FMT_BUILTIN_TYPES ? "width/precision is out of range" : "width/precision is not integer"; @@ -1134,7 +1156,6 @@ TEST(format_test, large_precision) { TEST(format_test, utf8_precision) { auto result = fmt::format("{:.4}", "caf\u00e9s"); // cafés - EXPECT_EQ(fmt::detail::compute_width(result), 4); EXPECT_EQ(result, "caf\u00e9"); }