Compare commits

...

9 Commits
11.1.2 ... text

11 changed files with 6672 additions and 6 deletions

View File

@ -33,6 +33,8 @@ if (MASTER_PROJECT AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING ${doc})
endif ()
option(FMT_USE_TEXT "Use the text library." OFF)
option(FMT_PEDANTIC "Enable extra warnings and expensive tests." OFF)
option(FMT_WERROR "Halt the compilation with an error on compiler warnings."
OFF)
@ -160,6 +162,10 @@ if (HAVE_OPEN)
set(FMT_SOURCES ${FMT_SOURCES} src/posix.cc)
endif ()
if (FMT_USE_TEXT)
set(FMT_SOURCES ${FMT_SOURCES} src/text/grapheme_break.cpp)
endif ()
add_library(fmt ${FMT_SOURCES} ${FMT_HEADERS} README.rst ChangeLog.rst)
add_library(fmt::fmt ALIAS fmt)
@ -180,6 +186,11 @@ target_include_directories(fmt PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
if (FMT_USE_TEXT)
target_include_directories(fmt PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src/text>)
endif ()
set_target_properties(fmt PROPERTIES
VERSION ${FMT_VERSION} SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR}
DEBUG_POSTFIX d)

View File

@ -45,6 +45,14 @@
#include "core.h"
#ifndef FMT_USE_TEXT
# define FMT_USE_TEXT 0
#endif
#if FMT_USE_TEXT
# include <boost/text/grapheme_break.hpp>
# include <boost/text/transcode_iterator.hpp>
#endif
#ifdef __clang__
# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
#else
@ -415,11 +423,6 @@ class output_range {
sentinel end() const { return {}; } // Sentinel is not used yet.
};
template <typename Char>
inline size_t count_code_points(basic_string_view<Char> s) {
return s.size();
}
// Counts the number of code points in a UTF-8 string.
inline size_t count_code_points(basic_string_view<char8_t> s) {
const char8_t* data = s.data();
@ -912,6 +915,59 @@ inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
return internal::copy_str<Char>(buffer, buffer + num_digits, out);
}
template <typename Char>
inline size_t compute_width(basic_string_view<Char> s) {
return s.size();
}
inline size_t compute_width(string_view s) {
#if FMT_USE_TEXT
basic_memory_buffer<uint32_t> code_points;
const char* s_end = s.data() + s.size();
boost::text::utf_8_to_32_iterator<const char*> begin(s.data(), s.data(),
s_end),
end(s.data(), s_end, s_end);
for (auto it = begin; it != end; ++it) code_points.push_back(*it);
size_t width = 0;
for (auto it = code_points.begin(), end = code_points.end(); it != end;
it = boost::text::next_grapheme_break(it, end)) {
auto cp = *it;
// Based on http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn.
width +=
1 + (cp >= 0x1100 &&
(cp <= 0x115f || // Hangul Jamo init. consonants
cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET〈
cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET 〉
// CJK ... Yi except Unicode Character “〿”:
(cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) ||
(cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
(cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs
(cp >= 0xfe10 && cp <= 0xfe19) || // Vertical Forms
(cp >= 0xfe30 && cp <= 0xfe6f) || // CJK Compatibility Forms
(cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms
(cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Forms
(cp >= 0x20000 && cp <= 0x2fffd) || // CJK
(cp >= 0x30000 && cp <= 0x3fffd) ||
// Miscellaneous Symbols and Pictographs + Emoticons:
(cp >= 0x1f300 && cp <= 0x1f64f) ||
// Supplemental Symbols and Pictographs:
(cp >= 0x1f900 && cp <= 0x1f9ff)));
}
return width;
#else
return s.size();
#endif // FMT_USE_TEXT
}
inline size_t compute_width(basic_string_view<char8_t> s) {
#if FMT_USE_TEXT
return compute_width(
string_view(reinterpret_cast<const char*>(s.data(), s.size())));
#else
return count_code_points(s);
#endif // FMT_USE_TEXT
}
#ifndef _WIN32
# define FMT_USE_WINDOWS_H 0
#elif !defined(FMT_USE_WINDOWS_H)
@ -1583,7 +1639,7 @@ template <typename Range> class basic_writer {
size_t size() const { return size_; }
size_t width() const {
return internal::count_code_points(basic_string_view<Char>(s, size_));
return internal::compute_width(basic_string_view<Char>(s, size_));
}
template <typename It> void operator()(It&& it) const {

View File

@ -0,0 +1,4 @@
#ifndef BOOST_ASSERT
#include <assert.h>
# define BOOST_ASSERT(condition) assert(condition)
#endif

View File

@ -0,0 +1,13 @@
#ifndef TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
#define TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
#include <vector>
namespace boost {
namespace container {
template <typename T, size_t>
using small_vector = std::vector<T>;
}
}
#endif // TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP

102
src/text/boost/text/config.hpp Executable file
View File

@ -0,0 +1,102 @@
#ifndef BOOST_TEXT_CONFIG_HPP
#define BOOST_TEXT_CONFIG_HPP
/** There are ICU-based implementations of many operations, but those are only
defined when BOOST_TEXT_HAS_ICU is nonzero. If you define this, you must
make sure the the ICU headers are in your path, and that your build
properly links in ICU. */
#ifndef BOOST_TEXT_HAS_ICU
# define BOOST_TEXT_HAS_ICU 0
#endif
/** There are ICU-based implementations of many operations, but those are only
used when BOOST_TEXT_HAS_ICU and BOOST_TEXT_USE_ICU are both nonzero. */
#ifndef BOOST_TEXT_USE_ICU
# define BOOST_TEXT_USE_ICU 0
#endif
/** When you insert into a rope, the incoming sequence may be inserted as a
new segment, or if it falls within an existing string-segment, it may be
inserted into the string object used to represent that segment. This only
happens if the incoming sequence will fit within the existing segment's
capacity, or if the segment is smaller than a certain limit.
BOOST_TEXT_STRING_INSERT_MAX is that limit. */
#ifndef BOOST_TEXT_STRING_INSERT_MAX
# define BOOST_TEXT_STRING_INSERT_MAX 4096
#endif
#ifndef BOOST_TEXT_DOXYGEN
// Nothing before GCC 6 has proper C++14 constexpr support.
#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
# define BOOST_TEXT_CXX14_CONSTEXPR
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
#elif defined(_MSC_VER) && _MSC_VER <= 1915
# define BOOST_TEXT_CXX14_CONSTEXPR
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
#else
# define BOOST_TEXT_CXX14_CONSTEXPR
# if defined(BOOST_NO_CXX14_CONSTEXPR)
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
# endif
#endif
// Implements separate compilation features as described in
// http://www.boost.org/more/separate_compilation.html
// normalize macros
#if !defined(BOOST_TEXT_DYN_LINK) && !defined(BOOST_TEXT_STATIC_LINK) && \
!defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_ALL_STATIC_LINK)
# define BOOST_TEXT_STATIC_LINK
#endif
#if defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_TEXT_DYN_LINK)
# define BOOST_TEXT_DYN_LINK
#elif defined(BOOST_ALL_STATIC_LINK) && !defined(BOOST_TEXT_STATIC_LINK)
# define BOOST_TEXT_STATIC_LINK
#endif
#if defined(BOOST_TEXT_DYN_LINK) && defined(BOOST_TEXT_STATIC_LINK)
# error Must not define both BOOST_TEXT_DYN_LINK and BOOST_TEXT_STATIC_LINK
#endif
// enable dynamic or static linking as requested
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
# if defined(BOOST_TEXT_SOURCE)
# define BOOST_TEXT_DECL BOOST_SYMBOL_EXPORT
# else
# define BOOST_TEXT_DECL BOOST_SYMBOL_IMPORT
# endif
#else
# define BOOST_TEXT_DECL
#endif
#if 0 // TODO: Disabled for now.
// enable automatic library variant selection
#if !defined(BOOST_TEXT_SOURCE) && !defined(BOOST_ALL_NO_LIB) && \
!defined(BOOST_TEXT_NO_LIB)
//
// Set the name of our library, this will get undef'ed by auto_link.hpp
// once it's done with it:
//
#define BOOST_LIB_NAME boost_text
//
// If we're importing code from a dll, then tell auto_link.hpp about it:
//
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
# define BOOST_DYN_LINK
#endif
//
// And include the header that does the work:
//
#include <boost/config/auto_link.hpp>
#endif // auto-linking disabled
#endif
#endif // doxygen
#endif

View File

@ -0,0 +1,51 @@
#ifndef BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
#define BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
#include <boost/text/detail/lzw.hpp>
#include <unordered_map>
namespace boost { namespace text { namespace detail {
template<typename Enum>
struct lzw_to_break_prop_iter
{
using value_type = std::pair<uint32_t, Enum>;
using difference_type = int;
using pointer = unsigned char *;
using reference = unsigned char &;
using iterator_category = std::output_iterator_tag;
using buffer_t = container::small_vector<unsigned char, 256>;
lzw_to_break_prop_iter(
std::unordered_map<uint32_t, Enum> & map, buffer_t & buf) :
map_(&map),
buf_(&buf)
{}
lzw_to_break_prop_iter & operator=(unsigned char c)
{
buf_->push_back(c);
auto const element_bytes = 4;
auto it = buf_->begin();
for (auto end = buf_->end() - buf_->size() % element_bytes;
it != end;
it += element_bytes) {
(*map_)[bytes_to_cp(&*it)] = Enum(*(it + 3));
}
buf_->erase(buf_->begin(), it);
return *this;
}
lzw_to_break_prop_iter & operator*() { return *this; }
lzw_to_break_prop_iter & operator++() { return *this; }
lzw_to_break_prop_iter & operator++(int) { return *this; }
private:
std::unordered_map<uint32_t, Enum> * map_;
buffer_t * buf_;
};
}}}
#endif

View File

@ -0,0 +1,104 @@
#ifndef BOOST_TEXT_DETAIL_LZW_HPP
#define BOOST_TEXT_DETAIL_LZW_HPP
#include <boost/assert.hpp>
#include <boost/container/small_vector.hpp>
#include <vector>
namespace boost { namespace text { namespace detail {
inline uint32_t bytes_to_uint32_t(unsigned char const * chars)
{
return chars[0] << 24 | chars[1] << 16 | chars[2] << 8 | chars[3] << 0;
}
inline uint32_t bytes_to_cp(unsigned char const * chars)
{
return chars[0] << 16 | chars[1] << 8 | chars[2] << 0;
}
inline uint32_t bytes_to_uint16_t(unsigned char const * chars)
{
return chars[0] << 8 | chars[1] << 0;
}
enum : uint16_t { no_predecessor = 0xffff, no_value = 0xffff };
struct lzw_reverse_table_element
{
lzw_reverse_table_element(
uint16_t pred = no_predecessor, uint16_t value = no_value) :
pred_(pred),
value_(value)
{}
uint16_t pred_;
uint16_t value_;
};
using lzw_reverse_table = std::vector<lzw_reverse_table_element>;
template<typename OutIter>
OutIter
copy_table_entry(lzw_reverse_table const & table, uint16_t i, OutIter out)
{
*out++ = table[i].value_;
while (table[i].pred_ != no_predecessor) {
i = table[i].pred_;
*out++ = table[i].value_;
}
return out;
}
// Hardcoded to 16 bits. Takes unsigned 16-bit LZW-compressed values as
// input and writes the decompressed unsigned char values to out.
template<typename Iter, typename OutIter>
OutIter lzw_decompress(Iter first, Iter last, OutIter out)
{
lzw_reverse_table reverse_table(1 << 16);
for (uint16_t i = 0; i < 256u; ++i) {
reverse_table[i].value_ = i;
}
container::small_vector<unsigned char, 256> table_entry;
uint32_t next_table_value = 256;
uint32_t const end_table_value = 1 << 16;
uint16_t prev_code = *first++;
BOOST_ASSERT(prev_code < 256);
unsigned char c = (unsigned char)prev_code;
table_entry.push_back(c);
*out++ = table_entry;
while (first != last) {
uint16_t const code = *first++;
table_entry.clear();
if (reverse_table[code].value_ == no_value) {
table_entry.push_back(c);
copy_table_entry(
reverse_table, prev_code, std::back_inserter(table_entry));
} else {
copy_table_entry(
reverse_table, code, std::back_inserter(table_entry));
}
*out++ = table_entry;
c = table_entry.back();
if (next_table_value < end_table_value) {
reverse_table[next_table_value++] =
lzw_reverse_table_element{prev_code, c};
}
prev_code = code;
}
return out;
}
}}}
#endif

View File

@ -0,0 +1,224 @@
#ifndef BOOST_TEXT_GRAPHEME_BREAK_HPP
#define BOOST_TEXT_GRAPHEME_BREAK_HPP
#include <array>
#include <unordered_map>
#include <stdint.h>
#define BOOST_TEXT_DECL
namespace boost { namespace text {
/** Analogue of `std::find()` that finds the last value `v` in `[first,
last)` for which `p(v)` is true. */
template<typename BidiIter, typename Pred>
BidiIter find_if_backward(BidiIter first, BidiIter last, Pred p)
{
auto it = last;
while (it != first) {
if (p(*--it))
return it;
}
return last;
}
/** The grapheme properties defined by Unicode. */
enum class grapheme_property {
Other,
CR,
LF,
Control,
Extend,
Regional_Indicator,
Prepend,
SpacingMark,
L,
V,
T,
LV,
LVT,
ExtPict,
ZWJ
};
namespace detail {
struct grapheme_prop_interval
{
uint32_t lo_;
uint32_t hi_;
grapheme_property prop_;
};
inline bool operator<(
grapheme_prop_interval lhs, grapheme_prop_interval rhs) noexcept
{
return lhs.hi_ <= rhs.lo_;
}
BOOST_TEXT_DECL std::array<grapheme_prop_interval, 6> const &
make_grapheme_prop_intervals();
BOOST_TEXT_DECL std::unordered_map<uint32_t, grapheme_property>
make_grapheme_prop_map();
}
/** Returns the grapheme property associated with code point `cp`. */
inline grapheme_property grapheme_prop(uint32_t cp) noexcept
{
static auto const map = detail::make_grapheme_prop_map();
static auto const intervals = detail::make_grapheme_prop_intervals();
auto const it = map.find(cp);
if (it == map.end()) {
auto const it2 = std::lower_bound(
intervals.begin(),
intervals.end(),
detail::grapheme_prop_interval{cp, cp + 1});
if (it2 == intervals.end() || cp < it2->lo_ || it2->hi_ <= cp)
return grapheme_property::Other;
return it2->prop_;
}
return it->second;
}
namespace detail {
inline bool skippable(grapheme_property prop) noexcept
{
return prop == grapheme_property::Extend;
}
enum class grapheme_break_emoji_state_t {
none,
first_emoji, // Indicates that prop points to an odd-count
// emoji.
second_emoji // Indicates that prop points to an even-count
// emoji.
};
template<typename CPIter>
struct grapheme_break_state
{
CPIter it;
grapheme_property prev_prop;
grapheme_property prop;
grapheme_break_emoji_state_t emoji_state;
};
template<typename CPIter>
grapheme_break_state<CPIter> next(grapheme_break_state<CPIter> state)
{
++state.it;
state.prev_prop = state.prop;
return state;
}
template<typename CPIter>
grapheme_break_state<CPIter> prev(grapheme_break_state<CPIter> state)
{
--state.it;
state.prop = state.prev_prop;
return state;
}
template<typename CPIter>
bool gb11_prefix(CPIter first, CPIter prev_it)
{
auto final_prop = grapheme_property::Other;
find_if_backward(first, prev_it, [&final_prop](uint32_t cp) {
final_prop = grapheme_prop(cp);
return final_prop != grapheme_property::Extend;
});
return final_prop == grapheme_property::ExtPict;
}
inline bool table_grapheme_break(
grapheme_property lhs, grapheme_property rhs) noexcept
{
// Note that RI.RI was changed to '1' since that case is handled
// in the grapheme break FSM.
// clang-format off
// See chart at https://unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakTest.html .
constexpr std::array<std::array<bool, 15>, 15> grapheme_breaks = {{
// Other CR LF Ctrl Ext RI Pre SpcMk L V T LV LVT ExtPict ZWJ
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Other
{{1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // CR
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // LF
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // Control
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Extend
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // RI
{{0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Prepend
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // SpacingMark
{{1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0}}, // L
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // V
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // T
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // LV
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // LVT
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ExtPict
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ZWJ
}};
// clang-format on
auto const lhs_int = static_cast<int>(lhs);
auto const rhs_int = static_cast<int>(rhs);
return grapheme_breaks[lhs_int][rhs_int];
}
}
template<typename CPIter, typename Sentinel>
CPIter next_grapheme_break(CPIter first, Sentinel last) noexcept
{
if (first == last)
return first;
detail::grapheme_break_state<CPIter> state;
state.it = first;
if (++state.it == last)
return state.it;
state.prev_prop = grapheme_prop(*std::prev(state.it));
state.prop = grapheme_prop(*state.it);
state.emoji_state =
state.prev_prop == grapheme_property::Regional_Indicator
? detail::grapheme_break_emoji_state_t::first_emoji
: detail::grapheme_break_emoji_state_t::none;
for (; state.it != last; state = next(state)) {
state.prop = grapheme_prop(*state.it);
// GB11
if (state.prev_prop == grapheme_property::ZWJ &&
state.prop == grapheme_property::ExtPict &&
detail::gb11_prefix(first, std::prev(state.it))) {
continue;
}
if (state.emoji_state ==
detail::grapheme_break_emoji_state_t::first_emoji) {
if (state.prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::none;
continue;
} else {
state.emoji_state =
detail::grapheme_break_emoji_state_t::none;
}
} else if (state.prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::first_emoji;
}
if (detail::table_grapheme_break(state.prev_prop, state.prop))
return state.it;
}
return state.it;
}
}}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
#ifndef TEXT_BOOST_THROW_EXCEPTION_HPP
#define TEXT_BOOST_THROW_EXCEPTION_HPP
namespace boost {
template <typename E>
void throw_exception(const E& e) { throw e; }
}
#endif // TEXT_BOOST_THROW_EXCEPTION_HPP

3589
src/text/grapheme_break.cpp Normal file

File diff suppressed because it is too large Load Diff