mirror of
https://github.com/fmtlib/fmt.git
synced 2025-06-25 09:21:41 +02:00
Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
48f76dbb52 | |||
ca3dacba47 | |||
6d47e093c5 | |||
d1626e96ef | |||
160bcb723c | |||
c7ea093c27 | |||
00434c93ef | |||
b12033fd68 | |||
e5ab813ffb |
@ -33,6 +33,8 @@ if (MASTER_PROJECT AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING ${doc})
|
||||
endif ()
|
||||
|
||||
option(FMT_USE_TEXT "Use the text library." OFF)
|
||||
|
||||
option(FMT_PEDANTIC "Enable extra warnings and expensive tests." OFF)
|
||||
option(FMT_WERROR "Halt the compilation with an error on compiler warnings."
|
||||
OFF)
|
||||
@ -160,6 +162,10 @@ if (HAVE_OPEN)
|
||||
set(FMT_SOURCES ${FMT_SOURCES} src/posix.cc)
|
||||
endif ()
|
||||
|
||||
if (FMT_USE_TEXT)
|
||||
set(FMT_SOURCES ${FMT_SOURCES} src/text/grapheme_break.cpp)
|
||||
endif ()
|
||||
|
||||
add_library(fmt ${FMT_SOURCES} ${FMT_HEADERS} README.rst ChangeLog.rst)
|
||||
add_library(fmt::fmt ALIAS fmt)
|
||||
|
||||
@ -180,6 +186,11 @@ target_include_directories(fmt PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
|
||||
if (FMT_USE_TEXT)
|
||||
target_include_directories(fmt PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src/text>)
|
||||
endif ()
|
||||
|
||||
set_target_properties(fmt PROPERTIES
|
||||
VERSION ${FMT_VERSION} SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR}
|
||||
DEBUG_POSTFIX d)
|
||||
|
@ -45,6 +45,14 @@
|
||||
|
||||
#include "core.h"
|
||||
|
||||
#ifndef FMT_USE_TEXT
|
||||
# define FMT_USE_TEXT 0
|
||||
#endif
|
||||
#if FMT_USE_TEXT
|
||||
# include <boost/text/grapheme_break.hpp>
|
||||
# include <boost/text/transcode_iterator.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
|
||||
#else
|
||||
@ -415,11 +423,6 @@ class output_range {
|
||||
sentinel end() const { return {}; } // Sentinel is not used yet.
|
||||
};
|
||||
|
||||
template <typename Char>
|
||||
inline size_t count_code_points(basic_string_view<Char> s) {
|
||||
return s.size();
|
||||
}
|
||||
|
||||
// Counts the number of code points in a UTF-8 string.
|
||||
inline size_t count_code_points(basic_string_view<char8_t> s) {
|
||||
const char8_t* data = s.data();
|
||||
@ -912,6 +915,59 @@ inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
|
||||
return internal::copy_str<Char>(buffer, buffer + num_digits, out);
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
inline size_t compute_width(basic_string_view<Char> s) {
|
||||
return s.size();
|
||||
}
|
||||
|
||||
inline size_t compute_width(string_view s) {
|
||||
#if FMT_USE_TEXT
|
||||
basic_memory_buffer<uint32_t> code_points;
|
||||
const char* s_end = s.data() + s.size();
|
||||
boost::text::utf_8_to_32_iterator<const char*> begin(s.data(), s.data(),
|
||||
s_end),
|
||||
end(s.data(), s_end, s_end);
|
||||
for (auto it = begin; it != end; ++it) code_points.push_back(*it);
|
||||
size_t width = 0;
|
||||
for (auto it = code_points.begin(), end = code_points.end(); it != end;
|
||||
it = boost::text::next_grapheme_break(it, end)) {
|
||||
auto cp = *it;
|
||||
// Based on http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn.
|
||||
width +=
|
||||
1 + (cp >= 0x1100 &&
|
||||
(cp <= 0x115f || // Hangul Jamo init. consonants
|
||||
cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET〈
|
||||
cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET 〉
|
||||
// CJK ... Yi except Unicode Character “〿”:
|
||||
(cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) ||
|
||||
(cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
|
||||
(cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs
|
||||
(cp >= 0xfe10 && cp <= 0xfe19) || // Vertical Forms
|
||||
(cp >= 0xfe30 && cp <= 0xfe6f) || // CJK Compatibility Forms
|
||||
(cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms
|
||||
(cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Forms
|
||||
(cp >= 0x20000 && cp <= 0x2fffd) || // CJK
|
||||
(cp >= 0x30000 && cp <= 0x3fffd) ||
|
||||
// Miscellaneous Symbols and Pictographs + Emoticons:
|
||||
(cp >= 0x1f300 && cp <= 0x1f64f) ||
|
||||
// Supplemental Symbols and Pictographs:
|
||||
(cp >= 0x1f900 && cp <= 0x1f9ff)));
|
||||
}
|
||||
return width;
|
||||
#else
|
||||
return s.size();
|
||||
#endif // FMT_USE_TEXT
|
||||
}
|
||||
|
||||
inline size_t compute_width(basic_string_view<char8_t> s) {
|
||||
#if FMT_USE_TEXT
|
||||
return compute_width(
|
||||
string_view(reinterpret_cast<const char*>(s.data(), s.size())));
|
||||
#else
|
||||
return count_code_points(s);
|
||||
#endif // FMT_USE_TEXT
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
# define FMT_USE_WINDOWS_H 0
|
||||
#elif !defined(FMT_USE_WINDOWS_H)
|
||||
@ -1583,7 +1639,7 @@ template <typename Range> class basic_writer {
|
||||
|
||||
size_t size() const { return size_; }
|
||||
size_t width() const {
|
||||
return internal::count_code_points(basic_string_view<Char>(s, size_));
|
||||
return internal::compute_width(basic_string_view<Char>(s, size_));
|
||||
}
|
||||
|
||||
template <typename It> void operator()(It&& it) const {
|
||||
|
4
src/text/boost/assert.hpp
Normal file
4
src/text/boost/assert.hpp
Normal file
@ -0,0 +1,4 @@
|
||||
#ifndef BOOST_ASSERT
|
||||
#include <assert.h>
|
||||
# define BOOST_ASSERT(condition) assert(condition)
|
||||
#endif
|
13
src/text/boost/container/small_vector.hpp
Normal file
13
src/text/boost/container/small_vector.hpp
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
|
||||
#define TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace boost {
|
||||
namespace container {
|
||||
template <typename T, size_t>
|
||||
using small_vector = std::vector<T>;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
|
102
src/text/boost/text/config.hpp
Executable file
102
src/text/boost/text/config.hpp
Executable file
@ -0,0 +1,102 @@
|
||||
#ifndef BOOST_TEXT_CONFIG_HPP
|
||||
#define BOOST_TEXT_CONFIG_HPP
|
||||
|
||||
|
||||
/** There are ICU-based implementations of many operations, but those are only
|
||||
defined when BOOST_TEXT_HAS_ICU is nonzero. If you define this, you must
|
||||
make sure the the ICU headers are in your path, and that your build
|
||||
properly links in ICU. */
|
||||
#ifndef BOOST_TEXT_HAS_ICU
|
||||
# define BOOST_TEXT_HAS_ICU 0
|
||||
#endif
|
||||
|
||||
/** There are ICU-based implementations of many operations, but those are only
|
||||
used when BOOST_TEXT_HAS_ICU and BOOST_TEXT_USE_ICU are both nonzero. */
|
||||
#ifndef BOOST_TEXT_USE_ICU
|
||||
# define BOOST_TEXT_USE_ICU 0
|
||||
#endif
|
||||
|
||||
/** When you insert into a rope, the incoming sequence may be inserted as a
|
||||
new segment, or if it falls within an existing string-segment, it may be
|
||||
inserted into the string object used to represent that segment. This only
|
||||
happens if the incoming sequence will fit within the existing segment's
|
||||
capacity, or if the segment is smaller than a certain limit.
|
||||
BOOST_TEXT_STRING_INSERT_MAX is that limit. */
|
||||
#ifndef BOOST_TEXT_STRING_INSERT_MAX
|
||||
# define BOOST_TEXT_STRING_INSERT_MAX 4096
|
||||
#endif
|
||||
|
||||
#ifndef BOOST_TEXT_DOXYGEN
|
||||
|
||||
// Nothing before GCC 6 has proper C++14 constexpr support.
|
||||
#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
|
||||
# define BOOST_TEXT_CXX14_CONSTEXPR
|
||||
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
|
||||
#elif defined(_MSC_VER) && _MSC_VER <= 1915
|
||||
# define BOOST_TEXT_CXX14_CONSTEXPR
|
||||
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
|
||||
#else
|
||||
# define BOOST_TEXT_CXX14_CONSTEXPR
|
||||
# if defined(BOOST_NO_CXX14_CONSTEXPR)
|
||||
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Implements separate compilation features as described in
|
||||
// http://www.boost.org/more/separate_compilation.html
|
||||
|
||||
// normalize macros
|
||||
|
||||
#if !defined(BOOST_TEXT_DYN_LINK) && !defined(BOOST_TEXT_STATIC_LINK) && \
|
||||
!defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_ALL_STATIC_LINK)
|
||||
# define BOOST_TEXT_STATIC_LINK
|
||||
#endif
|
||||
|
||||
#if defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_TEXT_DYN_LINK)
|
||||
# define BOOST_TEXT_DYN_LINK
|
||||
#elif defined(BOOST_ALL_STATIC_LINK) && !defined(BOOST_TEXT_STATIC_LINK)
|
||||
# define BOOST_TEXT_STATIC_LINK
|
||||
#endif
|
||||
|
||||
#if defined(BOOST_TEXT_DYN_LINK) && defined(BOOST_TEXT_STATIC_LINK)
|
||||
# error Must not define both BOOST_TEXT_DYN_LINK and BOOST_TEXT_STATIC_LINK
|
||||
#endif
|
||||
|
||||
// enable dynamic or static linking as requested
|
||||
|
||||
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
|
||||
# if defined(BOOST_TEXT_SOURCE)
|
||||
# define BOOST_TEXT_DECL BOOST_SYMBOL_EXPORT
|
||||
# else
|
||||
# define BOOST_TEXT_DECL BOOST_SYMBOL_IMPORT
|
||||
# endif
|
||||
#else
|
||||
# define BOOST_TEXT_DECL
|
||||
#endif
|
||||
|
||||
#if 0 // TODO: Disabled for now.
|
||||
// enable automatic library variant selection
|
||||
|
||||
#if !defined(BOOST_TEXT_SOURCE) && !defined(BOOST_ALL_NO_LIB) && \
|
||||
!defined(BOOST_TEXT_NO_LIB)
|
||||
//
|
||||
// Set the name of our library, this will get undef'ed by auto_link.hpp
|
||||
// once it's done with it:
|
||||
//
|
||||
#define BOOST_LIB_NAME boost_text
|
||||
//
|
||||
// If we're importing code from a dll, then tell auto_link.hpp about it:
|
||||
//
|
||||
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
|
||||
# define BOOST_DYN_LINK
|
||||
#endif
|
||||
//
|
||||
// And include the header that does the work:
|
||||
//
|
||||
#include <boost/config/auto_link.hpp>
|
||||
#endif // auto-linking disabled
|
||||
#endif
|
||||
|
||||
#endif // doxygen
|
||||
|
||||
#endif
|
51
src/text/boost/text/detail/break_prop_iter.hpp
Normal file
51
src/text/boost/text/detail/break_prop_iter.hpp
Normal file
@ -0,0 +1,51 @@
|
||||
#ifndef BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
|
||||
#define BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
|
||||
|
||||
#include <boost/text/detail/lzw.hpp>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace boost { namespace text { namespace detail {
|
||||
|
||||
template<typename Enum>
|
||||
struct lzw_to_break_prop_iter
|
||||
{
|
||||
using value_type = std::pair<uint32_t, Enum>;
|
||||
using difference_type = int;
|
||||
using pointer = unsigned char *;
|
||||
using reference = unsigned char &;
|
||||
using iterator_category = std::output_iterator_tag;
|
||||
using buffer_t = container::small_vector<unsigned char, 256>;
|
||||
|
||||
lzw_to_break_prop_iter(
|
||||
std::unordered_map<uint32_t, Enum> & map, buffer_t & buf) :
|
||||
map_(&map),
|
||||
buf_(&buf)
|
||||
{}
|
||||
|
||||
lzw_to_break_prop_iter & operator=(unsigned char c)
|
||||
{
|
||||
buf_->push_back(c);
|
||||
auto const element_bytes = 4;
|
||||
auto it = buf_->begin();
|
||||
for (auto end = buf_->end() - buf_->size() % element_bytes;
|
||||
it != end;
|
||||
it += element_bytes) {
|
||||
(*map_)[bytes_to_cp(&*it)] = Enum(*(it + 3));
|
||||
}
|
||||
buf_->erase(buf_->begin(), it);
|
||||
return *this;
|
||||
}
|
||||
lzw_to_break_prop_iter & operator*() { return *this; }
|
||||
lzw_to_break_prop_iter & operator++() { return *this; }
|
||||
lzw_to_break_prop_iter & operator++(int) { return *this; }
|
||||
|
||||
private:
|
||||
std::unordered_map<uint32_t, Enum> * map_;
|
||||
buffer_t * buf_;
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
#endif
|
104
src/text/boost/text/detail/lzw.hpp
Normal file
104
src/text/boost/text/detail/lzw.hpp
Normal file
@ -0,0 +1,104 @@
|
||||
#ifndef BOOST_TEXT_DETAIL_LZW_HPP
|
||||
#define BOOST_TEXT_DETAIL_LZW_HPP
|
||||
|
||||
#include <boost/assert.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace boost { namespace text { namespace detail {
|
||||
|
||||
inline uint32_t bytes_to_uint32_t(unsigned char const * chars)
|
||||
{
|
||||
return chars[0] << 24 | chars[1] << 16 | chars[2] << 8 | chars[3] << 0;
|
||||
}
|
||||
|
||||
inline uint32_t bytes_to_cp(unsigned char const * chars)
|
||||
{
|
||||
return chars[0] << 16 | chars[1] << 8 | chars[2] << 0;
|
||||
}
|
||||
|
||||
inline uint32_t bytes_to_uint16_t(unsigned char const * chars)
|
||||
{
|
||||
return chars[0] << 8 | chars[1] << 0;
|
||||
}
|
||||
|
||||
enum : uint16_t { no_predecessor = 0xffff, no_value = 0xffff };
|
||||
|
||||
struct lzw_reverse_table_element
|
||||
{
|
||||
lzw_reverse_table_element(
|
||||
uint16_t pred = no_predecessor, uint16_t value = no_value) :
|
||||
pred_(pred),
|
||||
value_(value)
|
||||
{}
|
||||
uint16_t pred_;
|
||||
uint16_t value_;
|
||||
};
|
||||
|
||||
using lzw_reverse_table = std::vector<lzw_reverse_table_element>;
|
||||
|
||||
template<typename OutIter>
|
||||
OutIter
|
||||
copy_table_entry(lzw_reverse_table const & table, uint16_t i, OutIter out)
|
||||
{
|
||||
*out++ = table[i].value_;
|
||||
while (table[i].pred_ != no_predecessor) {
|
||||
i = table[i].pred_;
|
||||
*out++ = table[i].value_;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Hardcoded to 16 bits. Takes unsigned 16-bit LZW-compressed values as
|
||||
// input and writes the decompressed unsigned char values to out.
|
||||
template<typename Iter, typename OutIter>
|
||||
OutIter lzw_decompress(Iter first, Iter last, OutIter out)
|
||||
{
|
||||
lzw_reverse_table reverse_table(1 << 16);
|
||||
for (uint16_t i = 0; i < 256u; ++i) {
|
||||
reverse_table[i].value_ = i;
|
||||
}
|
||||
|
||||
container::small_vector<unsigned char, 256> table_entry;
|
||||
|
||||
uint32_t next_table_value = 256;
|
||||
uint32_t const end_table_value = 1 << 16;
|
||||
|
||||
uint16_t prev_code = *first++;
|
||||
BOOST_ASSERT(prev_code < 256);
|
||||
unsigned char c = (unsigned char)prev_code;
|
||||
table_entry.push_back(c);
|
||||
*out++ = table_entry;
|
||||
|
||||
while (first != last) {
|
||||
uint16_t const code = *first++;
|
||||
|
||||
table_entry.clear();
|
||||
if (reverse_table[code].value_ == no_value) {
|
||||
table_entry.push_back(c);
|
||||
copy_table_entry(
|
||||
reverse_table, prev_code, std::back_inserter(table_entry));
|
||||
} else {
|
||||
copy_table_entry(
|
||||
reverse_table, code, std::back_inserter(table_entry));
|
||||
}
|
||||
|
||||
*out++ = table_entry;
|
||||
c = table_entry.back();
|
||||
|
||||
if (next_table_value < end_table_value) {
|
||||
reverse_table[next_table_value++] =
|
||||
lzw_reverse_table_element{prev_code, c};
|
||||
}
|
||||
|
||||
prev_code = code;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
}}}
|
||||
|
||||
#endif
|
224
src/text/boost/text/grapheme_break.hpp
Normal file
224
src/text/boost/text/grapheme_break.hpp
Normal file
@ -0,0 +1,224 @@
|
||||
#ifndef BOOST_TEXT_GRAPHEME_BREAK_HPP
|
||||
#define BOOST_TEXT_GRAPHEME_BREAK_HPP
|
||||
|
||||
#include <array>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define BOOST_TEXT_DECL
|
||||
|
||||
namespace boost { namespace text {
|
||||
|
||||
/** Analogue of `std::find()` that finds the last value `v` in `[first,
|
||||
last)` for which `p(v)` is true. */
|
||||
template<typename BidiIter, typename Pred>
|
||||
BidiIter find_if_backward(BidiIter first, BidiIter last, Pred p)
|
||||
{
|
||||
auto it = last;
|
||||
while (it != first) {
|
||||
if (p(*--it))
|
||||
return it;
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
/** The grapheme properties defined by Unicode. */
|
||||
enum class grapheme_property {
|
||||
Other,
|
||||
CR,
|
||||
LF,
|
||||
Control,
|
||||
Extend,
|
||||
Regional_Indicator,
|
||||
Prepend,
|
||||
SpacingMark,
|
||||
L,
|
||||
V,
|
||||
T,
|
||||
LV,
|
||||
LVT,
|
||||
ExtPict,
|
||||
ZWJ
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
struct grapheme_prop_interval
|
||||
{
|
||||
uint32_t lo_;
|
||||
uint32_t hi_;
|
||||
grapheme_property prop_;
|
||||
};
|
||||
|
||||
inline bool operator<(
|
||||
grapheme_prop_interval lhs, grapheme_prop_interval rhs) noexcept
|
||||
{
|
||||
return lhs.hi_ <= rhs.lo_;
|
||||
}
|
||||
|
||||
BOOST_TEXT_DECL std::array<grapheme_prop_interval, 6> const &
|
||||
make_grapheme_prop_intervals();
|
||||
BOOST_TEXT_DECL std::unordered_map<uint32_t, grapheme_property>
|
||||
make_grapheme_prop_map();
|
||||
}
|
||||
|
||||
/** Returns the grapheme property associated with code point `cp`. */
|
||||
inline grapheme_property grapheme_prop(uint32_t cp) noexcept
|
||||
{
|
||||
static auto const map = detail::make_grapheme_prop_map();
|
||||
static auto const intervals = detail::make_grapheme_prop_intervals();
|
||||
|
||||
auto const it = map.find(cp);
|
||||
if (it == map.end()) {
|
||||
auto const it2 = std::lower_bound(
|
||||
intervals.begin(),
|
||||
intervals.end(),
|
||||
detail::grapheme_prop_interval{cp, cp + 1});
|
||||
if (it2 == intervals.end() || cp < it2->lo_ || it2->hi_ <= cp)
|
||||
return grapheme_property::Other;
|
||||
return it2->prop_;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
inline bool skippable(grapheme_property prop) noexcept
|
||||
{
|
||||
return prop == grapheme_property::Extend;
|
||||
}
|
||||
|
||||
enum class grapheme_break_emoji_state_t {
|
||||
none,
|
||||
first_emoji, // Indicates that prop points to an odd-count
|
||||
// emoji.
|
||||
second_emoji // Indicates that prop points to an even-count
|
||||
// emoji.
|
||||
};
|
||||
|
||||
template<typename CPIter>
|
||||
struct grapheme_break_state
|
||||
{
|
||||
CPIter it;
|
||||
|
||||
grapheme_property prev_prop;
|
||||
grapheme_property prop;
|
||||
|
||||
grapheme_break_emoji_state_t emoji_state;
|
||||
};
|
||||
|
||||
template<typename CPIter>
|
||||
grapheme_break_state<CPIter> next(grapheme_break_state<CPIter> state)
|
||||
{
|
||||
++state.it;
|
||||
state.prev_prop = state.prop;
|
||||
return state;
|
||||
}
|
||||
|
||||
template<typename CPIter>
|
||||
grapheme_break_state<CPIter> prev(grapheme_break_state<CPIter> state)
|
||||
{
|
||||
--state.it;
|
||||
state.prop = state.prev_prop;
|
||||
return state;
|
||||
}
|
||||
|
||||
template<typename CPIter>
|
||||
bool gb11_prefix(CPIter first, CPIter prev_it)
|
||||
{
|
||||
auto final_prop = grapheme_property::Other;
|
||||
find_if_backward(first, prev_it, [&final_prop](uint32_t cp) {
|
||||
final_prop = grapheme_prop(cp);
|
||||
return final_prop != grapheme_property::Extend;
|
||||
});
|
||||
return final_prop == grapheme_property::ExtPict;
|
||||
}
|
||||
|
||||
inline bool table_grapheme_break(
|
||||
grapheme_property lhs, grapheme_property rhs) noexcept
|
||||
{
|
||||
// Note that RI.RI was changed to '1' since that case is handled
|
||||
// in the grapheme break FSM.
|
||||
|
||||
// clang-format off
|
||||
// See chart at https://unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakTest.html .
|
||||
constexpr std::array<std::array<bool, 15>, 15> grapheme_breaks = {{
|
||||
// Other CR LF Ctrl Ext RI Pre SpcMk L V T LV LVT ExtPict ZWJ
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Other
|
||||
{{1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // CR
|
||||
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // LF
|
||||
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // Control
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Extend
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // RI
|
||||
{{0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Prepend
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // SpacingMark
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0}}, // L
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // V
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // T
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // LV
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // LVT
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ExtPict
|
||||
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ZWJ
|
||||
|
||||
}};
|
||||
// clang-format on
|
||||
auto const lhs_int = static_cast<int>(lhs);
|
||||
auto const rhs_int = static_cast<int>(rhs);
|
||||
return grapheme_breaks[lhs_int][rhs_int];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CPIter, typename Sentinel>
|
||||
CPIter next_grapheme_break(CPIter first, Sentinel last) noexcept
|
||||
{
|
||||
if (first == last)
|
||||
return first;
|
||||
|
||||
detail::grapheme_break_state<CPIter> state;
|
||||
state.it = first;
|
||||
|
||||
if (++state.it == last)
|
||||
return state.it;
|
||||
|
||||
state.prev_prop = grapheme_prop(*std::prev(state.it));
|
||||
state.prop = grapheme_prop(*state.it);
|
||||
|
||||
state.emoji_state =
|
||||
state.prev_prop == grapheme_property::Regional_Indicator
|
||||
? detail::grapheme_break_emoji_state_t::first_emoji
|
||||
: detail::grapheme_break_emoji_state_t::none;
|
||||
|
||||
for (; state.it != last; state = next(state)) {
|
||||
state.prop = grapheme_prop(*state.it);
|
||||
|
||||
// GB11
|
||||
if (state.prev_prop == grapheme_property::ZWJ &&
|
||||
state.prop == grapheme_property::ExtPict &&
|
||||
detail::gb11_prefix(first, std::prev(state.it))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (state.emoji_state ==
|
||||
detail::grapheme_break_emoji_state_t::first_emoji) {
|
||||
if (state.prop == grapheme_property::Regional_Indicator) {
|
||||
state.emoji_state =
|
||||
detail::grapheme_break_emoji_state_t::none;
|
||||
continue;
|
||||
} else {
|
||||
state.emoji_state =
|
||||
detail::grapheme_break_emoji_state_t::none;
|
||||
}
|
||||
} else if (state.prop == grapheme_property::Regional_Indicator) {
|
||||
state.emoji_state =
|
||||
detail::grapheme_break_emoji_state_t::first_emoji;
|
||||
}
|
||||
|
||||
if (detail::table_grapheme_break(state.prev_prop, state.prop))
|
||||
return state.it;
|
||||
}
|
||||
|
||||
return state.it;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
2503
src/text/boost/text/transcode_iterator.hpp
Normal file
2503
src/text/boost/text/transcode_iterator.hpp
Normal file
File diff suppressed because it is too large
Load Diff
9
src/text/boost/throw_exception.hpp
Normal file
9
src/text/boost/throw_exception.hpp
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef TEXT_BOOST_THROW_EXCEPTION_HPP
|
||||
#define TEXT_BOOST_THROW_EXCEPTION_HPP
|
||||
|
||||
namespace boost {
|
||||
template <typename E>
|
||||
void throw_exception(const E& e) { throw e; }
|
||||
}
|
||||
|
||||
#endif // TEXT_BOOST_THROW_EXCEPTION_HPP
|
3589
src/text/grapheme_break.cpp
Normal file
3589
src/text/grapheme_break.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user