mirror of
https://github.com/fmtlib/fmt.git
synced 2025-11-25 11:49:52 +01:00
Make path formatting lossless with WTF-8
This commit is contained in:
@@ -1311,7 +1311,13 @@ class utf8_to_utf16 {
|
||||
inline auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
||||
};
|
||||
|
||||
enum class to_utf8_error_policy { abort, replace };
|
||||
enum class to_utf8_error_policy { abort, replace, wtf };
|
||||
|
||||
inline void to_utf8_3bytes(buffer<char>& buf, uint32_t cp) {
|
||||
buf.push_back(static_cast<char>(0xe0 | (cp >> 12)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((cp & 0xfff) >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (cp & 0x3f)));
|
||||
}
|
||||
|
||||
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
|
||||
template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
||||
@@ -1353,8 +1359,16 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
||||
// Handle a surrogate pair.
|
||||
++p;
|
||||
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
|
||||
if (policy == to_utf8_error_policy::abort) return false;
|
||||
switch (policy) {
|
||||
case to_utf8_error_policy::abort:
|
||||
return false;
|
||||
case to_utf8_error_policy::replace:
|
||||
buf.append(string_view("\xEF\xBF\xBD"));
|
||||
break;
|
||||
case to_utf8_error_policy::wtf:
|
||||
to_utf8_3bytes(buf, c);
|
||||
break;
|
||||
}
|
||||
--p;
|
||||
continue;
|
||||
}
|
||||
@@ -1366,9 +1380,7 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
||||
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||
} else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
|
||||
buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||
to_utf8_3bytes(buf, c);
|
||||
} else if (c >= 0x10000 && c <= 0x10ffff) {
|
||||
buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
|
||||
|
||||
@@ -84,11 +84,13 @@ namespace detail {
|
||||
template <typename Char, typename PathChar>
|
||||
auto get_path_string(const std::filesystem::path& p,
|
||||
const std::basic_string<PathChar>& native) {
|
||||
if constexpr (std::is_same_v<Char, char> && std::is_same_v<PathChar, wchar_t>)
|
||||
return to_utf8<wchar_t>(native, to_utf8_error_policy::replace);
|
||||
else
|
||||
if constexpr (std::is_same_v<Char, char> &&
|
||||
std::is_same_v<PathChar, wchar_t>) {
|
||||
return to_utf8<wchar_t>(native, to_utf8_error_policy::wtf);
|
||||
} else {
|
||||
return p.string<Char>();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Char, typename PathChar>
|
||||
void write_escaped_path(basic_memory_buffer<Char>& quoted,
|
||||
|
||||
@@ -39,13 +39,12 @@ TEST(std_test, path) {
|
||||
EXPECT_EQ(fmt::format("{}", path(L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448"
|
||||
L"\x0447\x044B\x043D\x0430")),
|
||||
"Шчучыншчына");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"\xd800")), "<EFBFBD>");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xd800 TAIL")), "HEAD <20> TAIL");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xDE00 TAIL")),
|
||||
"HEAD \xF0\x9F\x98\x80 TAIL");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xD83D\xDE00 TAIL")),
|
||||
"HEAD <20>\xF0\x9F\x98\x80 TAIL");
|
||||
EXPECT_EQ(fmt::format("{:?}", path(L"\xd800")), "\"\\ud800\"");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"\xD800")), "\xED\xA0\x80");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"[\xD800]")), "[\xED\xA0\x80]");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xDE00]")), "[\xF0\x9F\x98\x80]");
|
||||
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xD83D\xDE00]")),
|
||||
"[\xED\xA0\xBD\xF0\x9F\x98\x80]");
|
||||
EXPECT_EQ(fmt::format("{:?}", path(L"\xD800")), "\"\\ud800\"");
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user