Fixed regression in UTF16 decoding (fixes #1173)

This commit is contained in:
Benoit Blanchon
2020-01-27 12:10:10 +01:00
parent ddfe7d8b91
commit 09d4b2cd38
6 changed files with 130 additions and 12 deletions

View File

@ -6,6 +6,7 @@
#include <ArduinoJson/Deserialization/deserialize.hpp>
#include <ArduinoJson/Json/EscapeSequence.hpp>
#include <ArduinoJson/Json/Utf16.hpp>
#include <ArduinoJson/Json/Utf8.hpp>
#include <ArduinoJson/Memory/MemoryPool.hpp>
#include <ArduinoJson/Numbers/parseNumber.hpp>
@ -190,7 +191,7 @@ class JsonDeserializer {
DeserializationError parseQuotedString(const char *&result) {
StringBuilder builder = _stringStorage.startString();
#if ARDUINOJSON_DECODE_UNICODE
uint16_t surrogate1 = 0;
Utf16::Codepoint codepoint;
#endif
const char stopChar = current();
@ -208,20 +209,11 @@ class JsonDeserializer {
if (c == 'u') {
#if ARDUINOJSON_DECODE_UNICODE
move();
uint32_t codepoint;
uint16_t codeunit;
DeserializationError err = parseHex4(codeunit);
if (err) return err;
if (codeunit >= 0xDC00) {
codepoint =
uint32_t(0x10000 | ((surrogate1 << 10) | (codeunit & 0x3FF)));
} else if (codeunit < 0xd800) {
codepoint = codeunit;
} else {
surrogate1 = codeunit & 0x3FF;
continue;
}
Utf8::encodeCodepoint(codepoint, builder);
if (codepoint.append(codeunit))
Utf8::encodeCodepoint(codepoint.value(), builder);
continue;
#else
return DeserializationError::NotSupported;

View File

@ -0,0 +1,49 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2020
// MIT License
#pragma once
#include <ArduinoJson/Namespace.hpp>
#include <stdint.h> // uint16_t, uint32_t
namespace ARDUINOJSON_NAMESPACE {
namespace Utf16 {
inline bool isHighSurrogate(uint16_t codeunit) {
return codeunit >= 0xD800 && codeunit < 0xDC00;
}
inline bool isLowSurrogate(uint16_t codeunit) {
return codeunit >= 0xDC00 && codeunit < 0xE000;
}
class Codepoint {
public:
bool append(uint16_t codeunit) {
if (isHighSurrogate(codeunit)) {
_highSurrogate = codeunit & 0x3FF;
return false;
}
if (isLowSurrogate(codeunit)) {
_codepoint =
uint32_t(0x10000 + ((_highSurrogate << 10) | (codeunit & 0x3FF)));
return true;
}
_codepoint = codeunit;
return true;
}
uint32_t value() const {
return _codepoint;
}
private:
uint16_t _highSurrogate;
uint32_t _codepoint;
};
} // namespace Utf16
} // namespace ARDUINOJSON_NAMESPACE