forked from bblanchon/ArduinoJson
Fixed regression in UTF16 decoding (fixes #1173)
This commit is contained in:
@ -1,6 +1,11 @@
|
||||
ArduinoJson: change log
|
||||
=======================
|
||||
|
||||
HEAD
|
||||
----
|
||||
|
||||
* Fixed regression in UTF16 decoding (issue #1173)
|
||||
|
||||
v6.14.0 (2020-01-16)
|
||||
-------
|
||||
|
||||
|
@ -21,6 +21,9 @@ TEST_CASE("Valid JSON strings value") {
|
||||
{"'\\u00E4'", "\xc3\xa4"}, // ä
|
||||
{"'\\u3042'", "\xe3\x81\x82"}, // あ
|
||||
{"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"}, // 🖤
|
||||
{"'\\uF053'", "\xef\x81\x93"}, // issue #1173
|
||||
{"'\\uF015'", "\xef\x80\x95"}, // issue #1173
|
||||
{"'\\uF054'", "\xef\x81\x94"}, // issue #1173
|
||||
};
|
||||
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
|
||||
|
||||
|
@ -11,6 +11,7 @@ add_executable(MiscTests
|
||||
TypeTraits.cpp
|
||||
unsigned_char.cpp
|
||||
Utf8.cpp
|
||||
Utf16.cpp
|
||||
version.cpp
|
||||
)
|
||||
|
||||
|
68
extras/tests/Misc/Utf16.cpp
Normal file
68
extras/tests/Misc/Utf16.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
// ArduinoJson - arduinojson.org
|
||||
// Copyright Benoit Blanchon 2014-2020
|
||||
// MIT License
|
||||
|
||||
#include <ArduinoJson/Json/Utf16.hpp>
|
||||
#include <catch.hpp>
|
||||
|
||||
using namespace ARDUINOJSON_NAMESPACE;
|
||||
|
||||
static void testUtf16Codepoint(uint16_t codeunit, uint32_t expectedCodepoint) {
|
||||
Utf16::Codepoint cp;
|
||||
REQUIRE(cp.append(codeunit) == true);
|
||||
REQUIRE(cp.value() == expectedCodepoint);
|
||||
}
|
||||
|
||||
static void testUtf16Codepoint(uint16_t codeunit1, uint16_t codeunit2,
|
||||
uint32_t expectedCodepoint) {
|
||||
Utf16::Codepoint cp;
|
||||
REQUIRE(cp.append(codeunit1) == false);
|
||||
REQUIRE(cp.append(codeunit2) == true);
|
||||
REQUIRE(cp.value() == expectedCodepoint);
|
||||
}
|
||||
|
||||
TEST_CASE("Utf16::Codepoint()") {
|
||||
SECTION("U+0000") {
|
||||
testUtf16Codepoint(0x0000, 0x000000);
|
||||
}
|
||||
|
||||
SECTION("U+0001") {
|
||||
testUtf16Codepoint(0x0001, 0x000001);
|
||||
}
|
||||
|
||||
SECTION("U+D7FF") {
|
||||
testUtf16Codepoint(0xD7FF, 0x00D7FF);
|
||||
}
|
||||
|
||||
SECTION("U+E000") {
|
||||
testUtf16Codepoint(0xE000, 0x00E000);
|
||||
}
|
||||
|
||||
SECTION("U+FFFF") {
|
||||
testUtf16Codepoint(0xFFFF, 0x00FFFF);
|
||||
}
|
||||
|
||||
SECTION("U+010000") {
|
||||
testUtf16Codepoint(0xD800, 0xDC00, 0x010000);
|
||||
}
|
||||
|
||||
SECTION("U+010001") {
|
||||
testUtf16Codepoint(0xD800, 0xDC01, 0x010001);
|
||||
}
|
||||
|
||||
SECTION("U+0103FF") {
|
||||
testUtf16Codepoint(0xD800, 0xDFFF, 0x0103FF);
|
||||
}
|
||||
|
||||
SECTION("U+010400") {
|
||||
testUtf16Codepoint(0xD801, 0xDC00, 0x010400);
|
||||
}
|
||||
|
||||
SECTION("U+010400") {
|
||||
testUtf16Codepoint(0xDBFF, 0xDC00, 0x10FC00);
|
||||
}
|
||||
|
||||
SECTION("U+10FFFF") {
|
||||
testUtf16Codepoint(0xDBFF, 0xDFFF, 0x10FFFF);
|
||||
}
|
||||
}
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <ArduinoJson/Deserialization/deserialize.hpp>
|
||||
#include <ArduinoJson/Json/EscapeSequence.hpp>
|
||||
#include <ArduinoJson/Json/Utf16.hpp>
|
||||
#include <ArduinoJson/Json/Utf8.hpp>
|
||||
#include <ArduinoJson/Memory/MemoryPool.hpp>
|
||||
#include <ArduinoJson/Numbers/parseNumber.hpp>
|
||||
@ -190,7 +191,7 @@ class JsonDeserializer {
|
||||
DeserializationError parseQuotedString(const char *&result) {
|
||||
StringBuilder builder = _stringStorage.startString();
|
||||
#if ARDUINOJSON_DECODE_UNICODE
|
||||
uint16_t surrogate1 = 0;
|
||||
Utf16::Codepoint codepoint;
|
||||
#endif
|
||||
const char stopChar = current();
|
||||
|
||||
@ -208,20 +209,11 @@ class JsonDeserializer {
|
||||
if (c == 'u') {
|
||||
#if ARDUINOJSON_DECODE_UNICODE
|
||||
move();
|
||||
uint32_t codepoint;
|
||||
uint16_t codeunit;
|
||||
DeserializationError err = parseHex4(codeunit);
|
||||
if (err) return err;
|
||||
if (codeunit >= 0xDC00) {
|
||||
codepoint =
|
||||
uint32_t(0x10000 | ((surrogate1 << 10) | (codeunit & 0x3FF)));
|
||||
} else if (codeunit < 0xd800) {
|
||||
codepoint = codeunit;
|
||||
} else {
|
||||
surrogate1 = codeunit & 0x3FF;
|
||||
continue;
|
||||
}
|
||||
Utf8::encodeCodepoint(codepoint, builder);
|
||||
if (codepoint.append(codeunit))
|
||||
Utf8::encodeCodepoint(codepoint.value(), builder);
|
||||
continue;
|
||||
#else
|
||||
return DeserializationError::NotSupported;
|
||||
|
49
src/ArduinoJson/Json/Utf16.hpp
Normal file
49
src/ArduinoJson/Json/Utf16.hpp
Normal file
@ -0,0 +1,49 @@
|
||||
// ArduinoJson - arduinojson.org
|
||||
// Copyright Benoit Blanchon 2014-2020
|
||||
// MIT License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ArduinoJson/Namespace.hpp>
|
||||
|
||||
#include <stdint.h> // uint16_t, uint32_t
|
||||
|
||||
namespace ARDUINOJSON_NAMESPACE {
|
||||
|
||||
namespace Utf16 {
|
||||
inline bool isHighSurrogate(uint16_t codeunit) {
|
||||
return codeunit >= 0xD800 && codeunit < 0xDC00;
|
||||
}
|
||||
|
||||
inline bool isLowSurrogate(uint16_t codeunit) {
|
||||
return codeunit >= 0xDC00 && codeunit < 0xE000;
|
||||
}
|
||||
|
||||
class Codepoint {
|
||||
public:
|
||||
bool append(uint16_t codeunit) {
|
||||
if (isHighSurrogate(codeunit)) {
|
||||
_highSurrogate = codeunit & 0x3FF;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isLowSurrogate(codeunit)) {
|
||||
_codepoint =
|
||||
uint32_t(0x10000 + ((_highSurrogate << 10) | (codeunit & 0x3FF)));
|
||||
return true;
|
||||
}
|
||||
|
||||
_codepoint = codeunit;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t value() const {
|
||||
return _codepoint;
|
||||
}
|
||||
|
||||
private:
|
||||
uint16_t _highSurrogate;
|
||||
uint32_t _codepoint;
|
||||
};
|
||||
} // namespace Utf16
|
||||
} // namespace ARDUINOJSON_NAMESPACE
|
Reference in New Issue
Block a user