Escape invalid code points

This commit is contained in:
Victor Zverovich
2021-08-22 15:14:13 -07:00
parent a76031e11d
commit a212ff757f
3 changed files with 24 additions and 23 deletions

View File

@ -232,18 +232,18 @@ struct singleton {
unsigned char lower_count; unsigned char lower_count;
}; };
inline auto is_printable(uint16_t x, const singleton* singleton_uppers, inline auto is_printable(uint16_t x, const singleton* singletons,
size_t singleton_uppers_size, size_t singletons_size,
const unsigned char* singleton_lowers, const unsigned char* singleton_lowers,
const unsigned char* normal, size_t normal_size) const unsigned char* normal, size_t normal_size)
-> bool { -> bool {
auto upper = x >> 8; auto upper = x >> 8;
auto lower_start = 0; auto lower_start = 0;
for (size_t i = 0; i < singleton_uppers_size; ++i) { for (size_t i = 0; i < singletons_size; ++i) {
auto su = singleton_uppers[i]; auto s = singletons[i];
auto lower_end = lower_start + su.lower_count; auto lower_end = lower_start + s.lower_count;
if (upper < su.upper) break; if (upper < s.upper) break;
if (upper == su.upper) { if (upper == s.upper) {
for (auto j = lower_start; j < lower_end; ++j) { for (auto j = lower_start; j < lower_end; ++j) {
if (singleton_lowers[j] == (x & 0xff)) return false; if (singleton_lowers[j] == (x & 0xff)) return false;
} }
@ -266,7 +266,7 @@ inline auto is_printable(uint16_t x, const singleton* singleton_uppers,
// Returns true iff the code point cp is printable. // Returns true iff the code point cp is printable.
// This code is generated by support/printable.py. // This code is generated by support/printable.py.
inline auto is_printable(uint32_t cp) -> bool { inline auto is_printable(uint32_t cp) -> bool {
static constexpr singleton singletons0_upper[] = { static constexpr singleton singletons0[] = {
{0x00, 1}, {0x03, 5}, {0x05, 6}, {0x06, 3}, {0x07, 6}, {0x08, 8}, {0x00, 1}, {0x03, 5}, {0x05, 6}, {0x06, 3}, {0x07, 6}, {0x08, 8},
{0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13}, {0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13},
{0x0f, 4}, {0x10, 3}, {0x12, 18}, {0x13, 9}, {0x16, 1}, {0x17, 5}, {0x0f, 4}, {0x10, 3}, {0x12, 18}, {0x13, 9}, {0x16, 1}, {0x17, 5},
@ -302,7 +302,7 @@ inline auto is_printable(uint32_t cp) -> bool {
0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7, 0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
0xfe, 0xff, 0xfe, 0xff,
}; };
static constexpr singleton singletons1_upper[] = { static constexpr singleton singletons1[] = {
{0x00, 6}, {0x01, 1}, {0x03, 1}, {0x04, 2}, {0x08, 8}, {0x09, 2}, {0x00, 6}, {0x01, 1}, {0x03, 1}, {0x04, 2}, {0x08, 8}, {0x09, 2},
{0x0a, 5}, {0x0b, 2}, {0x0e, 4}, {0x10, 1}, {0x11, 2}, {0x12, 5}, {0x0a, 5}, {0x0b, 2}, {0x0e, 4}, {0x10, 1}, {0x11, 2}, {0x12, 5},
{0x13, 17}, {0x14, 1}, {0x15, 2}, {0x17, 2}, {0x19, 13}, {0x1c, 5}, {0x13, 17}, {0x14, 1}, {0x15, 2}, {0x17, 2}, {0x19, 13}, {0x1c, 5},
@ -395,13 +395,13 @@ inline auto is_printable(uint32_t cp) -> bool {
}; };
auto lower = static_cast<uint16_t>(cp); auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) { if (cp < 0x10000) {
return is_printable(lower, singletons0_upper, return is_printable(lower, singletons0,
sizeof(singletons0_upper) / sizeof(*singletons0_upper), sizeof(singletons0) / sizeof(*singletons0),
singletons0_lower, normal0, sizeof(normal0)); singletons0_lower, normal0, sizeof(normal0));
} }
if (cp < 0x20000) { if (cp < 0x20000) {
return is_printable(lower, singletons1_upper, return is_printable(lower, singletons1,
sizeof(singletons1_upper) / sizeof(*singletons1_upper), sizeof(singletons1) / sizeof(*singletons1),
singletons1_lower, normal1, sizeof(normal1)); singletons1_lower, normal1, sizeof(normal1));
} }
if (0x2a6de <= cp && cp < 0x2a700) return false; if (0x2a6de <= cp && cp < 0x2a700) return false;
@ -412,7 +412,7 @@ inline auto is_printable(uint32_t cp) -> bool {
if (0x2fa1e <= cp && cp < 0x30000) return false; if (0x2fa1e <= cp && cp < 0x30000) return false;
if (0x3134b <= cp && cp < 0xe0100) return false; if (0x3134b <= cp && cp < 0xe0100) return false;
if (0xe01f0 <= cp && cp < 0x110000) return false; if (0xe01f0 <= cp && cp < 0x110000) return false;
return true; return cp < 0x110000;
} }
inline auto needs_escape(uint32_t cp) -> bool { inline auto needs_escape(uint32_t cp) -> bool {

View File

@ -173,29 +173,29 @@ def main():
print("""\ print("""\
inline auto is_printable(uint32_t cp) -> bool {\ inline auto is_printable(uint32_t cp) -> bool {\
""") """)
print_singletons(singletons0u, singletons0l, 'singletons0_upper', 'singletons0_lower') print_singletons(singletons0u, singletons0l, 'singletons0', 'singletons0_lower')
print_singletons(singletons1u, singletons1l, 'singletons1_upper', 'singletons1_lower') print_singletons(singletons1u, singletons1l, 'singletons1', 'singletons1_lower')
print_normal(normal0, 'normal0') print_normal(normal0, 'normal0')
print_normal(normal1, 'normal1') print_normal(normal1, 'normal1')
print("""\ print("""\
auto lower = static_cast<uint16_t>(cp); auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) { if (cp < 0x10000) {
return is_printable(lower, singletons0_upper, return is_printable(lower, singletons0,
sizeof(singletons0_upper) / sizeof(*singletons0_upper), sizeof(singletons0) / sizeof(*singletons0),
singletons0_lower, normal0, sizeof(normal0)); singletons0_lower, normal0, sizeof(normal0));
} }
if (cp < 0x20000) { if (cp < 0x20000) {
return is_printable(lower, singletons1_upper, return is_printable(lower, singletons1,
sizeof(singletons1_upper) / sizeof(*singletons1_upper), sizeof(singletons1) / sizeof(*singletons1),
singletons1_lower, normal1, sizeof(normal1)); singletons1_lower, normal1, sizeof(normal1));
}\ }\
""") """)
for a, b in extra: for a, b in extra:
print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b)) print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
print("""\ print("""\
return true; return cp < 0x{:x};
}\ }}\
""") """.format(NUM_CODEPOINTS))
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -267,6 +267,7 @@ TEST(ranges_test, is_printable) {
using fmt::detail::is_printable; using fmt::detail::is_printable;
EXPECT_TRUE(is_printable(0x0323)); EXPECT_TRUE(is_printable(0x0323));
EXPECT_FALSE(is_printable(0x0378)); EXPECT_FALSE(is_printable(0x0378));
EXPECT_FALSE(is_printable(0x110000));
} }
TEST(ranges_test, escape_string) { TEST(ranges_test, escape_string) {