From 7df2c82a8af6415c972019d84686ec277fe05a6b Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Sun, 22 Aug 2021 09:10:10 -0700 Subject: [PATCH] Rewrite printable.py codegen to emit C++ --- support/printable.py | 123 +++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 62 deletions(-) diff --git a/support/printable.py b/support/printable.py index 7ba37f56..192e89d6 100755 --- a/support/printable.py +++ b/support/printable.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 +# This script is based on +# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py +# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT. + # This script uses the following Unicode tables: # - UnicodeData.txt -# Script license: https://github.com/rust-lang/rust/blob/master/LICENSE-MIT from collections import namedtuple @@ -112,23 +115,20 @@ def compress_normal(normal): return compressed def print_singletons(uppers, lowers, uppersname, lowersname): - print("#[rustfmt::skip]") - print("const {}: &[(u8, u8)] = &[".format(uppersname)) + print(" static constexpr singleton {}[] = {{".format(uppersname)) for u, c in uppers: - print(" ({:#04x}, {}),".format(u, c)) - print("];") - print("#[rustfmt::skip]") - print("const {}: &[u8] = &[".format(lowersname)) + print(" {{{:#04x}, {}}},".format(u, c)) + print(" };") + print(" static constexpr unsigned char {}[] = {{".format(lowersname)) for i in range(0, len(lowers), 8): print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8]))) - print("];") + print(" };") def print_normal(normal, normalname): - print("#[rustfmt::skip]") - print("const {}: &[u8] = &[".format(normalname)) + print(" static constexpr unsigned char {}[] = {{".format(normalname)) for v in normal: print(" {}".format(" ".join("{:#04x},".format(i) for i in v))) - print("];") + print(" };") def main(): file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt") @@ -171,67 +171,66 @@ def main(): normal1 = compress_normal(normal1) print("""\ -// NOTE: The following code was generated by "src/libcore/unicode/printable.py", -// do not edit directly! +struct singleton { + unsigned char upper; + unsigned char lowercount; +}; -fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool { - let xupper = (x >> 8) as u8; - let mut lowerstart = 0; - for &(upper, lowercount) in singletonuppers { - let lowerend = lowerstart + lowercount as usize; - if xupper == upper { - for &lower in &singletonlowers[lowerstart..lowerend] { - if lower == x as u8 { - return false; - } - } - } else if xupper < upper { - break; - } - lowerstart = lowerend; +inline auto check(uint16_t x, const singleton* singletonuppers, + size_t singletonuppers_size, + const unsigned char* singletonlowers, + const unsigned char* normal, size_t normal_size) -> bool { + auto xupper = x >> 8; + auto lowerstart = 0; + for (size_t i = 0; i < singletonuppers_size; ++i) { + auto su = singletonuppers[i]; + auto lowerend = lowerstart + su.lowercount; + if (xupper < su.upper) break; + if (xupper == su.upper) { + for (auto j = lowerstart; j < lowerend; ++j) { + if (singletonlowers[j] == x) return false; + } } + lowerstart = lowerend; + } - let mut x = x as i32; - let mut normal = normal.iter().cloned(); - let mut current = true; - while let Some(v) = normal.next() { - let len = if v & 0x80 != 0 { - ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32 - } else { - v as i32 - }; - x -= len; - if x < 0 { - break; - } - current = !current; - } - current + auto xsigned = static_cast(x); + auto current = true; + for (size_t i = 0; i < normal_size; ++i) { + auto v = static_cast(normal[i]); + auto len = v & 0x80 != 0 ? (v & 0x7f) << 8 | normal[i++] : v; + xsigned -= len; + if (xsigned < 0) break; + current = !current; + } + return current; } -pub(crate) fn is_printable(x: char) -> bool { - let x = x as u32; - let lower = x as u16; - if x < 0x10000 { - check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0) - } else if x < 0x20000 { - check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1) - } else {\ +inline auto is_printable(uint32_t cp) -> bool {\ +""") + print_singletons(singletons0u, singletons0l, 'singletons0u', 'singletons0l') + print_singletons(singletons1u, singletons1l, 'singletons1u', 'singletons1l') + print_normal(normal0, 'normal0') + print_normal(normal1, 'normal1') + print("""\ + auto lower = static_cast(cp); + if (cp < 0x10000) { + return check(lower, singletons0u, + sizeof(singletons0u) / sizeof(*singletons0u), singletons0l, + normal0, sizeof(normal0)); + } + if (cp < 0x20000) { + return check(lower, singletons1u, + sizeof(singletons1u) / sizeof(*singletons1u), singletons1l, + normal1, sizeof(normal1)); + }\ """) for a, b in extra: - print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b)) - print(" return false;") - print(" }") + print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b)) print("""\ - true - } + return true; }\ """) - print() - print_singletons(singletons0u, singletons0l, 'SINGLETONS0U', 'SINGLETONS0L') - print_singletons(singletons1u, singletons1l, 'SINGLETONS1U', 'SINGLETONS1L') - print_normal(normal0, 'NORMAL0') - print_normal(normal1, 'NORMAL1') if __name__ == '__main__': main()