diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e9740fd..2cab7edc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ ArduinoJson: change log ======================= +HEAD +---- + +* Fixed regression in UTF16 decoding (issue #1173) + v6.14.0 (2020-01-16) ------- diff --git a/extras/tests/JsonDeserializer/string.cpp b/extras/tests/JsonDeserializer/string.cpp index 721dd2f9..40e4c7bb 100644 --- a/extras/tests/JsonDeserializer/string.cpp +++ b/extras/tests/JsonDeserializer/string.cpp @@ -21,6 +21,9 @@ TEST_CASE("Valid JSON strings value") { {"'\\u00E4'", "\xc3\xa4"}, // ä {"'\\u3042'", "\xe3\x81\x82"}, // あ {"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"}, // 🖤 + {"'\\uF053'", "\xef\x81\x93"}, // issue #1173 + {"'\\uF015'", "\xef\x80\x95"}, // issue #1173 + {"'\\uF054'", "\xef\x81\x94"}, // issue #1173 }; const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); diff --git a/extras/tests/Misc/CMakeLists.txt b/extras/tests/Misc/CMakeLists.txt index d1068292..bd26bb2e 100644 --- a/extras/tests/Misc/CMakeLists.txt +++ b/extras/tests/Misc/CMakeLists.txt @@ -11,6 +11,7 @@ add_executable(MiscTests TypeTraits.cpp unsigned_char.cpp Utf8.cpp + Utf16.cpp version.cpp ) diff --git a/extras/tests/Misc/Utf16.cpp b/extras/tests/Misc/Utf16.cpp new file mode 100644 index 00000000..071ed134 --- /dev/null +++ b/extras/tests/Misc/Utf16.cpp @@ -0,0 +1,68 @@ +// ArduinoJson - arduinojson.org +// Copyright Benoit Blanchon 2014-2020 +// MIT License + +#include +#include + +using namespace ARDUINOJSON_NAMESPACE; + +static void testUtf16Codepoint(uint16_t codeunit, uint32_t expectedCodepoint) { + Utf16::Codepoint cp; + REQUIRE(cp.append(codeunit) == true); + REQUIRE(cp.value() == expectedCodepoint); +} + +static void testUtf16Codepoint(uint16_t codeunit1, uint16_t codeunit2, + uint32_t expectedCodepoint) { + Utf16::Codepoint cp; + REQUIRE(cp.append(codeunit1) == false); + REQUIRE(cp.append(codeunit2) == true); + REQUIRE(cp.value() == expectedCodepoint); +} + +TEST_CASE("Utf16::Codepoint()") { + SECTION("U+0000") { + testUtf16Codepoint(0x0000, 0x000000); + } + + SECTION("U+0001") { + testUtf16Codepoint(0x0001, 0x000001); + } + + SECTION("U+D7FF") { + testUtf16Codepoint(0xD7FF, 0x00D7FF); + } + + SECTION("U+E000") { + testUtf16Codepoint(0xE000, 0x00E000); + } + + SECTION("U+FFFF") { + testUtf16Codepoint(0xFFFF, 0x00FFFF); + } + + SECTION("U+010000") { + testUtf16Codepoint(0xD800, 0xDC00, 0x010000); + } + + SECTION("U+010001") { + testUtf16Codepoint(0xD800, 0xDC01, 0x010001); + } + + SECTION("U+0103FF") { + testUtf16Codepoint(0xD800, 0xDFFF, 0x0103FF); + } + + SECTION("U+010400") { + testUtf16Codepoint(0xD801, 0xDC00, 0x010400); + } + + SECTION("U+010400") { + testUtf16Codepoint(0xDBFF, 0xDC00, 0x10FC00); + } + + SECTION("U+10FFFF") { + testUtf16Codepoint(0xDBFF, 0xDFFF, 0x10FFFF); + } +} diff --git a/src/ArduinoJson/Json/JsonDeserializer.hpp b/src/ArduinoJson/Json/JsonDeserializer.hpp index 52124111..53255d72 100644 --- a/src/ArduinoJson/Json/JsonDeserializer.hpp +++ b/src/ArduinoJson/Json/JsonDeserializer.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -190,7 +191,7 @@ class JsonDeserializer { DeserializationError parseQuotedString(const char *&result) { StringBuilder builder = _stringStorage.startString(); #if ARDUINOJSON_DECODE_UNICODE - uint16_t surrogate1 = 0; + Utf16::Codepoint codepoint; #endif const char stopChar = current(); @@ -208,20 +209,11 @@ class JsonDeserializer { if (c == 'u') { #if ARDUINOJSON_DECODE_UNICODE move(); - uint32_t codepoint; uint16_t codeunit; DeserializationError err = parseHex4(codeunit); if (err) return err; - if (codeunit >= 0xDC00) { - codepoint = - uint32_t(0x10000 | ((surrogate1 << 10) | (codeunit & 0x3FF))); - } else if (codeunit < 0xd800) { - codepoint = codeunit; - } else { - surrogate1 = codeunit & 0x3FF; - continue; - } - Utf8::encodeCodepoint(codepoint, builder); + if (codepoint.append(codeunit)) + Utf8::encodeCodepoint(codepoint.value(), builder); continue; #else return DeserializationError::NotSupported; diff --git a/src/ArduinoJson/Json/Utf16.hpp b/src/ArduinoJson/Json/Utf16.hpp new file mode 100644 index 00000000..aaf9f6f3 --- /dev/null +++ b/src/ArduinoJson/Json/Utf16.hpp @@ -0,0 +1,49 @@ +// ArduinoJson - arduinojson.org +// Copyright Benoit Blanchon 2014-2020 +// MIT License + +#pragma once + +#include + +#include // uint16_t, uint32_t + +namespace ARDUINOJSON_NAMESPACE { + +namespace Utf16 { +inline bool isHighSurrogate(uint16_t codeunit) { + return codeunit >= 0xD800 && codeunit < 0xDC00; +} + +inline bool isLowSurrogate(uint16_t codeunit) { + return codeunit >= 0xDC00 && codeunit < 0xE000; +} + +class Codepoint { + public: + bool append(uint16_t codeunit) { + if (isHighSurrogate(codeunit)) { + _highSurrogate = codeunit & 0x3FF; + return false; + } + + if (isLowSurrogate(codeunit)) { + _codepoint = + uint32_t(0x10000 + ((_highSurrogate << 10) | (codeunit & 0x3FF))); + return true; + } + + _codepoint = codeunit; + return true; + } + + uint32_t value() const { + return _codepoint; + } + + private: + uint16_t _highSurrogate; + uint32_t _codepoint; +}; +} // namespace Utf16 +} // namespace ARDUINOJSON_NAMESPACE