From 2a4e9488644e1b5ed5eb2eeddf5767b5003a60c6 Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Sat, 21 Jul 2018 09:13:21 -0700 Subject: [PATCH] Add UTF-8 types --- include/fmt/core.h | 11 +++++----- include/fmt/format-inl.h | 10 +++++++++ include/fmt/format.h | 46 ++++++++++++++++++++++++++++++++++++---- test/format-impl-test.cc | 6 +++++- test/format-test.cc | 27 +++++++++++++++++++++++ test/util-test.cc | 3 ++- 6 files changed, 92 insertions(+), 11 deletions(-) diff --git a/include/fmt/core.h b/include/fmt/core.h index ae4e39ce..a3581647 100644 --- a/include/fmt/core.h +++ b/include/fmt/core.h @@ -1062,8 +1062,8 @@ const long long format_arg_store::TYPES = get_types(); /** \rst Constructs an `~fmt::format_arg_store` object that contains references to - arguments and can be implicitly converted to `~fmt::format_args`. `Context` can - be omitted in which case it defaults to `~fmt::context`. + arguments and can be implicitly converted to `~fmt::format_args`. `Context` + can be omitted in which case it defaults to `~fmt::context`. \endrst */ template @@ -1334,11 +1334,12 @@ inline void print(std::FILE *f, string_view format_str, const Args & ... args) { vprint(f, format_str, as); } /** - Prints formatted data to the file *f* which should be in wide-oriented mode set - via ``fwide(f, 1)`` or ``_setmode(_fileno(f), _O_U8TEXT)`` on Windows. + Prints formatted data to the file *f* which should be in wide-oriented mode + set via ``fwide(f, 1)`` or ``_setmode(_fileno(f), _O_U8TEXT)`` on Windows. */ template -inline void print(std::FILE *f, wstring_view format_str, const Args & ... args) { +inline void print(std::FILE *f, wstring_view format_str, + const Args & ... args) { format_arg_store as(args...); vprint(f, format_str, as); } diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h index e66bbb7a..d7231a18 100644 --- a/include/fmt/format-inl.h +++ b/include/fmt/format-inl.h @@ -202,6 +202,16 @@ class locale { std::locale get() { return locale_; } }; +FMT_FUNC size_t internal::count_code_points(u8string_view s) { + const char8_t *data = s.data(); + int num_code_points = 0; + for (size_t i = 0, size = s.size(); i != size; ++i) { + if ((data[i].value & 0xc0) != 0x80) + ++num_code_points; + } + return num_code_points; +} + template FMT_FUNC Char internal::thousands_sep(locale_provider *lp) { std::locale loc = lp ? lp->locale().get() : std::locale(); diff --git a/include/fmt/format.h b/include/fmt/format.h index 5e951f5a..b80704dc 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -489,6 +489,37 @@ void basic_buffer::append(const U *begin, const U *end) { } } // namespace internal +// A UTF-8 code unit type. +struct char8_t { + char value; + FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT { + return value != 0; + } +}; + +// A UTF-8 string. +class u8string_view : public basic_string_view { + private: + typedef basic_string_view base; + + public: + using basic_string_view::basic_string_view; + + u8string_view(const char *s) + : base(reinterpret_cast(s)) {} + + u8string_view(const char *s, size_t count) FMT_NOEXCEPT + : base(reinterpret_cast(s), count) {} +}; + +#if FMT_USE_USER_DEFINED_LITERALS +inline namespace literals { +inline u8string_view operator"" _u(const char *s, std::size_t n) { + return u8string_view(s, n); +} +} +#endif + // A wrapper around std::locale used to reduce compile times since // is very heavy. class locale; @@ -950,6 +981,9 @@ inline unsigned count_digits(uint64_t n) { } #endif +// Counts the number of code points in a UTF-8 string. +FMT_API size_t count_code_points(u8string_view s); + #if FMT_HAS_CPP_ATTRIBUTE(always_inline) # define FMT_ALWAYS_INLINE __attribute__((always_inline)) #else @@ -3514,7 +3548,8 @@ template inline wformat_context::iterator format_to( basic_memory_buffer &buf, wstring_view format_str, const Args & ... args) { - return vformat_to(buf, format_str, make_format_args(args...)); + return vformat_to(buf, format_str, + make_format_args(args...)); } template @@ -3573,7 +3608,8 @@ inline typename std::enable_if< is_contiguous::value, std::back_insert_iterator>::type format_to(std::back_insert_iterator out, wstring_view format_str, const Args & ... args) { - return vformat_to(out, format_str, make_format_args(args...)); + return vformat_to(out, format_str, + make_format_args(args...)); } template @@ -3847,10 +3883,12 @@ inline void print(rgb fd, string_view format_str, const Args & ... args) { Formats a string and prints it to stdout using ANSI escape sequences to specify foreground color 'fd' and background color 'bg'. Example: - fmt::print(fmt::color::red, fmt::color::black, "Elapsed time: {0:.2f} seconds", 1.23); + fmt::print(fmt::color::red, fmt::color::black, + "Elapsed time: {0:.2f} seconds", 1.23); */ template -inline void print(rgb fd, rgb bg, string_view format_str, const Args & ... args) { +inline void print(rgb fd, rgb bg, string_view format_str, + const Args & ... args) { vprint_rgb(fd, bg, format_str, make_format_args(args...)); } #endif // FMT_EXTENDED_COLORS diff --git a/test/format-impl-test.cc b/test/format-impl-test.cc index 9919f405..abb6825e 100644 --- a/test/format-impl-test.cc +++ b/test/format-impl-test.cc @@ -118,9 +118,13 @@ TEST(FormatTest, FormatErrorCode) { } } +TEST(FormatTest, CountCodePoints) { + EXPECT_EQ(4, fmt::internal::count_code_points(fmt::u8string_view("ёжик"))); +} + TEST(ColorsTest, Colors) { EXPECT_WRITE(stdout, fmt::print(fmt::rgb(255,20,30), "rgb(255,20,30)"), "\x1b[38;2;255;020;030mrgb(255,20,30)\x1b[0m"); - EXPECT_WRITE(stdout, fmt::print(fmt::color::blue,"blue"), + EXPECT_WRITE(stdout, fmt::print(fmt::color::blue, "blue"), "\x1b[38;2;000;000;255mblue\x1b[0m"); } diff --git a/test/format-test.cc b/test/format-test.cc index a1556520..447a7dea 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -1932,3 +1932,30 @@ TEST(FormatTest, FormatStringErrors) { } #endif // FMT_USE_CONSTEXPR +TEST(FormatTest, ConstructU8StringViewFromCString) { + fmt::u8string_view s("ab"); + EXPECT_EQ(s.size(), 2u); + const fmt::char8_t *data = s.data(); + EXPECT_EQ(data[0].value, 'a'); + EXPECT_EQ(data[1].value, 'b'); +} + +TEST(FormatTest, ConstructU8StringViewFromDataAndSize) { + fmt::u8string_view s("foobar", 3); + EXPECT_EQ(s.size(), 3u); + const fmt::char8_t *data = s.data(); + EXPECT_EQ(data[0].value, 'f'); + EXPECT_EQ(data[1].value, 'o'); + EXPECT_EQ(data[2].value, 'o'); +} + +#if FMT_USE_USER_DEFINED_LITERALS +TEST(FormatTest, U8StringViewLiteral) { + using namespace fmt::literals; + fmt::u8string_view s = "ab"_u; + EXPECT_EQ(s.size(), 2u); + const fmt::char8_t *data = s.data(); + EXPECT_EQ(data[0].value, 'a'); + EXPECT_EQ(data[1].value, 'b'); +} +#endif diff --git a/test/util-test.cc b/test/util-test.cc index e14878f4..46396f79 100644 --- a/test/util-test.cc +++ b/test/util-test.cc @@ -868,7 +868,8 @@ TEST(UtilTest, IsEnumConvertibleToInt) { #endif TEST(UtilTest, ParseNonnegativeInt) { - if (std::numeric_limits::max() != static_cast(static_cast(1) << 31)) { + if (std::numeric_limits::max() != + static_cast(static_cast(1) << 31)) { fmt::print("Skipping parse_nonnegative_int test\n"); return; }