forked from bblanchon/ArduinoJson
Improved decoding of UTF-16 surrogate pairs (closes #1157)
This commit is contained in:
committed by
Benoit Blanchon
parent
8550418875
commit
91b808381e
@ -189,6 +189,7 @@ class JsonDeserializer {
|
||||
|
||||
DeserializationError parseQuotedString(const char *&result) {
|
||||
StringBuilder builder = _stringStorage.startString();
|
||||
uint16_t surrogate1 = 0;
|
||||
const char stopChar = current();
|
||||
|
||||
move();
|
||||
@ -208,7 +209,19 @@ class JsonDeserializer {
|
||||
move();
|
||||
DeserializationError err = parseCodepoint(codepoint);
|
||||
if (err) return err;
|
||||
Utf8::encodeCodepoint(codepoint, builder);
|
||||
if (codepoint >= 0xd800 && codepoint <= 0xdbff) {
|
||||
if (surrogate1 > 0) return DeserializationError::InvalidInput;
|
||||
surrogate1 = codepoint;
|
||||
} else if (codepoint >= 0xdc00 && codepoint <= 0xdfff) {
|
||||
if (surrogate1 == 0) return DeserializationError::InvalidInput;
|
||||
uint32_t codepoint32 = 0x10000;
|
||||
codepoint32 += static_cast<uint32_t>(surrogate1 - 0xd800) << 10;
|
||||
codepoint32 += codepoint - 0xdc00;
|
||||
Utf8::encodeCodepoint(codepoint32, builder);
|
||||
surrogate1 = 0;
|
||||
} else {
|
||||
Utf8::encodeCodepoint(codepoint, builder);
|
||||
}
|
||||
continue;
|
||||
#else
|
||||
return DeserializationError::NotSupported;
|
||||
@ -220,6 +233,8 @@ class JsonDeserializer {
|
||||
move();
|
||||
}
|
||||
|
||||
if (surrogate1 > 0) return DeserializationError::InvalidInput;
|
||||
|
||||
builder.append(c);
|
||||
}
|
||||
|
||||
|
@ -10,17 +10,21 @@ namespace ARDUINOJSON_NAMESPACE {
|
||||
|
||||
namespace Utf8 {
|
||||
template <typename TStringBuilder>
|
||||
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
|
||||
inline void encodeCodepoint(uint32_t codepoint, TStringBuilder &str) {
|
||||
if (codepoint < 0x80) {
|
||||
str.append(char(codepoint));
|
||||
return;
|
||||
}
|
||||
|
||||
if (codepoint >= 0x00000800) {
|
||||
if (codepoint < 0x00000800) {
|
||||
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
|
||||
} else if (codepoint < 0x00010000) {
|
||||
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
|
||||
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
|
||||
} else {
|
||||
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
|
||||
} else if (codepoint < 0x00110000) {
|
||||
str.append(char(0xf0 /*0b11110000*/ | (codepoint >> 18)));
|
||||
str.append(char(((codepoint >> 12) & 0x3f /*0b00111111*/) | 0x80));
|
||||
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
|
||||
}
|
||||
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
|
||||
}
|
||||
|
Reference in New Issue
Block a user