mirror of
https://github.com/fmtlib/fmt.git
synced 2025-11-27 21:00:01 +01:00
Unification utf16/utf32 to utf8 conversion
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
This commit is contained in:
committed by
Victor Zverovich
parent
e84b00e014
commit
dde8cf3bb7
@@ -1418,6 +1418,68 @@ class utf8_to_utf16 {
|
||||
auto str() const -> std::wstring { return {&buffer_[0], size()}; }
|
||||
};
|
||||
|
||||
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
|
||||
template <typename WChar, typename Buffer = memory_buffer>
|
||||
class unicode_to_utf8 {
|
||||
private:
|
||||
Buffer buffer_;
|
||||
|
||||
public:
|
||||
unicode_to_utf8() {}
|
||||
explicit unicode_to_utf8(basic_string_view<WChar> s) {
|
||||
static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4,
|
||||
"Expect utf16 or utf32");
|
||||
|
||||
if (!convert(s))
|
||||
FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16"
|
||||
: "invalid utf32"));
|
||||
}
|
||||
operator string_view() const { return string_view(&buffer_[0], size()); }
|
||||
size_t size() const { return buffer_.size() - 1; }
|
||||
const char* c_str() const { return &buffer_[0]; }
|
||||
std::string str() const { return std::string(&buffer_[0], size()); }
|
||||
|
||||
// Performs conversion returning a bool instead of throwing exception on
|
||||
// conversion error. This method may still throw in case of memory allocation
|
||||
// error.
|
||||
bool convert(basic_string_view<WChar> s) {
|
||||
if (!convert(buffer_, s)) return false;
|
||||
buffer_.push_back(0);
|
||||
return true;
|
||||
}
|
||||
static bool convert(Buffer& buf, basic_string_view<WChar> s) {
|
||||
for (auto p = s.begin(); p != s.end(); ++p) {
|
||||
uint32_t c = static_cast<uint32_t>(*p);
|
||||
if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
|
||||
// surrogate pair
|
||||
++p;
|
||||
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
|
||||
return false;
|
||||
}
|
||||
c = (c << 10) + static_cast<uint32_t>(*p) - 0x35fdc00;
|
||||
}
|
||||
if (c < 0x80) {
|
||||
buf.push_back(static_cast<char>(c));
|
||||
} else if (c < 0x800) {
|
||||
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||
} else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
|
||||
buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||
} else if (c >= 0x10000 && c <= 0x10ffff) {
|
||||
buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));
|
||||
buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
|
||||
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
|
||||
inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
|
||||
#if FMT_USE_INT128
|
||||
|
||||
Reference in New Issue
Block a user