Reduced Unicode conversion code size (-122 bytes on AVR)

This commit is contained in:
Benoit Blanchon
2020-01-09 15:39:45 +01:00
parent 91b808381e
commit 5ec062cc71
5 changed files with 115 additions and 37 deletions

View File

@ -189,7 +189,9 @@ class JsonDeserializer {
DeserializationError parseQuotedString(const char *&result) {
StringBuilder builder = _stringStorage.startString();
#if ARDUINOJSON_DECODE_UNICODE
uint16_t surrogate1 = 0;
#endif
const char stopChar = current();
move();
@ -205,23 +207,21 @@ class JsonDeserializer {
if (c == '\0') return DeserializationError::IncompleteInput;
if (c == 'u') {
#if ARDUINOJSON_DECODE_UNICODE
uint16_t codepoint;
move();
DeserializationError err = parseCodepoint(codepoint);
uint32_t codepoint;
uint16_t codeunit;
DeserializationError err = parseHex4(codeunit);
if (err) return err;
if (codepoint >= 0xd800 && codepoint <= 0xdbff) {
if (surrogate1 > 0) return DeserializationError::InvalidInput;
surrogate1 = codepoint;
} else if (codepoint >= 0xdc00 && codepoint <= 0xdfff) {
if (surrogate1 == 0) return DeserializationError::InvalidInput;
uint32_t codepoint32 = 0x10000;
codepoint32 += static_cast<uint32_t>(surrogate1 - 0xd800) << 10;
codepoint32 += codepoint - 0xdc00;
Utf8::encodeCodepoint(codepoint32, builder);
surrogate1 = 0;
if (codeunit >= 0xDC00) {
codepoint =
uint32_t(0x10000 | ((surrogate1 << 10) | (codeunit & 0x3FF)));
} else if (codeunit < 0xd800) {
codepoint = codeunit;
} else {
Utf8::encodeCodepoint(codepoint, builder);
surrogate1 = codeunit & 0x3FF;
continue;
}
Utf8::encodeCodepoint(codepoint, builder);
continue;
#else
return DeserializationError::NotSupported;
@ -233,8 +233,6 @@ class JsonDeserializer {
move();
}
if (surrogate1 > 0) return DeserializationError::InvalidInput;
builder.append(c);
}
@ -312,14 +310,14 @@ class JsonDeserializer {
return DeserializationError::InvalidInput;
}
DeserializationError parseCodepoint(uint16_t &codepoint) {
codepoint = 0;
DeserializationError parseHex4(uint16_t &result) {
result = 0;
for (uint8_t i = 0; i < 4; ++i) {
char digit = current();
if (!digit) return DeserializationError::IncompleteInput;
uint8_t value = decodeHex(digit);
if (value > 0x0F) return DeserializationError::InvalidInput;
codepoint = uint16_t((codepoint << 4) | value);
result = uint16_t((result << 4) | value);
move();
}
return DeserializationError::Ok;

View File

@ -10,23 +10,37 @@ namespace ARDUINOJSON_NAMESPACE {
namespace Utf8 {
template <typename TStringBuilder>
inline void encodeCodepoint(uint32_t codepoint, TStringBuilder &str) {
if (codepoint < 0x80) {
str.append(char(codepoint));
return;
inline void encodeCodepoint(uint32_t codepoint32, TStringBuilder& str) {
// this function was optimize for code size on AVR
// a buffer to store the string in reverse
char buf[5];
char* p = buf;
*(p++) = 0;
if (codepoint32 < 0x80) {
*(p++) = char((codepoint32));
} else {
*(p++) = char((codepoint32 | 0x80) & 0xBF);
uint16_t codepoint16 = uint16_t(codepoint32 >> 6);
if (codepoint16 < 0x20) { // 0x800
*(p++) = char(codepoint16 | 0xC0);
} else {
*(p++) = char((codepoint16 | 0x80) & 0xBF);
codepoint16 = uint16_t(codepoint16 >> 6);
if (codepoint16 < 0x10) { // 0x10000
*(p++) = char(codepoint16 | 0xE0);
} else {
*(p++) = char((codepoint16 | 0x80) & 0xBF);
codepoint16 = uint16_t(codepoint16 >> 6);
*(p++) = char(codepoint16 | 0xF0);
}
}
}
if (codepoint < 0x00000800) {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
} else if (codepoint < 0x00010000) {
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
} else if (codepoint < 0x00110000) {
str.append(char(0xf0 /*0b11110000*/ | (codepoint >> 18)));
str.append(char(((codepoint >> 12) & 0x3f /*0b00111111*/) | 0x80));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
while (*(--p)) {
str.append(*p);
}
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
}
} // namespace Utf8
} // namespace ARDUINOJSON_NAMESPACE