mirror of
https://github.com/fmtlib/fmt.git
synced 2025-11-25 11:49:52 +01:00
Make path formatting lossless with WTF-8
This commit is contained in:
@@ -1311,7 +1311,13 @@ class utf8_to_utf16 {
|
|||||||
inline auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
inline auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class to_utf8_error_policy { abort, replace };
|
enum class to_utf8_error_policy { abort, replace, wtf };
|
||||||
|
|
||||||
|
inline void to_utf8_3bytes(buffer<char>& buf, uint32_t cp) {
|
||||||
|
buf.push_back(static_cast<char>(0xe0 | (cp >> 12)));
|
||||||
|
buf.push_back(static_cast<char>(0x80 | ((cp & 0xfff) >> 6)));
|
||||||
|
buf.push_back(static_cast<char>(0x80 | (cp & 0x3f)));
|
||||||
|
}
|
||||||
|
|
||||||
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
|
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
|
||||||
template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
||||||
@@ -1353,8 +1359,16 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
|||||||
// Handle a surrogate pair.
|
// Handle a surrogate pair.
|
||||||
++p;
|
++p;
|
||||||
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
|
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
|
||||||
if (policy == to_utf8_error_policy::abort) return false;
|
switch (policy) {
|
||||||
buf.append(string_view("\xEF\xBF\xBD"));
|
case to_utf8_error_policy::abort:
|
||||||
|
return false;
|
||||||
|
case to_utf8_error_policy::replace:
|
||||||
|
buf.append(string_view("\xEF\xBF\xBD"));
|
||||||
|
break;
|
||||||
|
case to_utf8_error_policy::wtf:
|
||||||
|
to_utf8_3bytes(buf, c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
--p;
|
--p;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1366,9 +1380,7 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
|
|||||||
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
|
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
|
||||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||||
} else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
|
} else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
|
||||||
buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
|
to_utf8_3bytes(buf, c);
|
||||||
buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
|
|
||||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
|
||||||
} else if (c >= 0x10000 && c <= 0x10ffff) {
|
} else if (c >= 0x10000 && c <= 0x10ffff) {
|
||||||
buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
|
buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
|
||||||
buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
|
buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
|
||||||
|
|||||||
@@ -84,10 +84,12 @@ namespace detail {
|
|||||||
template <typename Char, typename PathChar>
|
template <typename Char, typename PathChar>
|
||||||
auto get_path_string(const std::filesystem::path& p,
|
auto get_path_string(const std::filesystem::path& p,
|
||||||
const std::basic_string<PathChar>& native) {
|
const std::basic_string<PathChar>& native) {
|
||||||
if constexpr (std::is_same_v<Char, char> && std::is_same_v<PathChar, wchar_t>)
|
if constexpr (std::is_same_v<Char, char> &&
|
||||||
return to_utf8<wchar_t>(native, to_utf8_error_policy::replace);
|
std::is_same_v<PathChar, wchar_t>) {
|
||||||
else
|
return to_utf8<wchar_t>(native, to_utf8_error_policy::wtf);
|
||||||
|
} else {
|
||||||
return p.string<Char>();
|
return p.string<Char>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Char, typename PathChar>
|
template <typename Char, typename PathChar>
|
||||||
|
|||||||
@@ -39,13 +39,12 @@ TEST(std_test, path) {
|
|||||||
EXPECT_EQ(fmt::format("{}", path(L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448"
|
EXPECT_EQ(fmt::format("{}", path(L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448"
|
||||||
L"\x0447\x044B\x043D\x0430")),
|
L"\x0447\x044B\x043D\x0430")),
|
||||||
"Шчучыншчына");
|
"Шчучыншчына");
|
||||||
EXPECT_EQ(fmt::format("{}", path(L"\xd800")), "<EFBFBD>");
|
EXPECT_EQ(fmt::format("{}", path(L"\xD800")), "\xED\xA0\x80");
|
||||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xd800 TAIL")), "HEAD <20> TAIL");
|
EXPECT_EQ(fmt::format("{}", path(L"[\xD800]")), "[\xED\xA0\x80]");
|
||||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xDE00 TAIL")),
|
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xDE00]")), "[\xF0\x9F\x98\x80]");
|
||||||
"HEAD \xF0\x9F\x98\x80 TAIL");
|
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xD83D\xDE00]")),
|
||||||
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xD83D\xDE00 TAIL")),
|
"[\xED\xA0\xBD\xF0\x9F\x98\x80]");
|
||||||
"HEAD <20>\xF0\x9F\x98\x80 TAIL");
|
EXPECT_EQ(fmt::format("{:?}", path(L"\xD800")), "\"\\ud800\"");
|
||||||
EXPECT_EQ(fmt::format("{:?}", path(L"\xd800")), "\"\\ud800\"");
|
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user