diff --git a/include/fmt/format.h b/include/fmt/format.h index 65e19d2d..8c6d0451 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -1311,7 +1311,13 @@ class utf8_to_utf16 { inline auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; -enum class to_utf8_error_policy { abort, replace }; +enum class to_utf8_error_policy { abort, replace, wtf }; + +inline void to_utf8_3bytes(buffer& buf, uint32_t cp) { + buf.push_back(static_cast(0xe0 | (cp >> 12))); + buf.push_back(static_cast(0x80 | ((cp & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (cp & 0x3f))); +} // A converter from UTF-16/UTF-32 (host endian) to UTF-8. template class to_utf8 { @@ -1353,8 +1359,16 @@ template class to_utf8 { // Handle a surrogate pair. ++p; if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) { - if (policy == to_utf8_error_policy::abort) return false; - buf.append(string_view("\xEF\xBF\xBD")); + switch (policy) { + case to_utf8_error_policy::abort: + return false; + case to_utf8_error_policy::replace: + buf.append(string_view("\xEF\xBF\xBD")); + break; + case to_utf8_error_policy::wtf: + to_utf8_3bytes(buf, c); + break; + } --p; continue; } @@ -1366,9 +1380,7 @@ template class to_utf8 { buf.push_back(static_cast(0xc0 | (c >> 6))); buf.push_back(static_cast(0x80 | (c & 0x3f))); } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { - buf.push_back(static_cast(0xe0 | (c >> 12))); - buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); + to_utf8_3bytes(buf, c); } else if (c >= 0x10000 && c <= 0x10ffff) { buf.push_back(static_cast(0xf0 | (c >> 18))); buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); diff --git a/include/fmt/std.h b/include/fmt/std.h index 184c6d26..6dd56e28 100644 --- a/include/fmt/std.h +++ b/include/fmt/std.h @@ -84,10 +84,12 @@ namespace detail { template auto get_path_string(const std::filesystem::path& p, const std::basic_string& native) { - if constexpr (std::is_same_v && std::is_same_v) - return to_utf8(native, to_utf8_error_policy::replace); - else + if constexpr (std::is_same_v && + std::is_same_v) { + return to_utf8(native, to_utf8_error_policy::wtf); + } else { return p.string(); + } } template diff --git a/test/std-test.cc b/test/std-test.cc index 18f6bd3f..c5a0c65a 100644 --- a/test/std-test.cc +++ b/test/std-test.cc @@ -39,13 +39,12 @@ TEST(std_test, path) { EXPECT_EQ(fmt::format("{}", path(L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448" L"\x0447\x044B\x043D\x0430")), "Шчучыншчына"); - EXPECT_EQ(fmt::format("{}", path(L"\xd800")), "�"); - EXPECT_EQ(fmt::format("{}", path(L"HEAD \xd800 TAIL")), "HEAD � TAIL"); - EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xDE00 TAIL")), - "HEAD \xF0\x9F\x98\x80 TAIL"); - EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xD83D\xDE00 TAIL")), - "HEAD �\xF0\x9F\x98\x80 TAIL"); - EXPECT_EQ(fmt::format("{:?}", path(L"\xd800")), "\"\\ud800\""); + EXPECT_EQ(fmt::format("{}", path(L"\xD800")), "\xED\xA0\x80"); + EXPECT_EQ(fmt::format("{}", path(L"[\xD800]")), "[\xED\xA0\x80]"); + EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xDE00]")), "[\xF0\x9F\x98\x80]"); + EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xD83D\xDE00]")), + "[\xED\xA0\xBD\xF0\x9F\x98\x80]"); + EXPECT_EQ(fmt::format("{:?}", path(L"\xD800")), "\"\\ud800\""); # endif }