diff --git a/CHANGELOG.md b/CHANGELOG.md index bfb82a05..8599cc1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ ArduinoJson: change log ======================= +HEAD +---- + +* Decode escaped Unicode characters like \u00DE (issue #304, PR #791) + Many thanks to Daniel Schulte (aka @trilader) who implemented this feature. +* Add option ARDUINOJSON_DECODE_UNICODE to enable it + v6.8.0-beta (2019-01-30) ----------- diff --git a/src/ArduinoJson/Configuration.hpp b/src/ArduinoJson/Configuration.hpp index accff366..6f08fe83 100644 --- a/src/ArduinoJson/Configuration.hpp +++ b/src/ArduinoJson/Configuration.hpp @@ -120,6 +120,11 @@ #endif #endif +// Convert unicode escape sequence (\u0123) to UTF-8 +#ifndef ARDUINOJSON_DECODE_UNICODE +#define ARDUINOJSON_DECODE_UNICODE 0 +#endif + // Control the exponentiation threshold for big numbers // CAUTION: cannot be more that 1e9 !!!! #ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD diff --git a/src/ArduinoJson/Json/JsonDeserializer.hpp b/src/ArduinoJson/Json/JsonDeserializer.hpp index 1b508e74..f0bdfb0c 100644 --- a/src/ArduinoJson/Json/JsonDeserializer.hpp +++ b/src/ArduinoJson/Json/JsonDeserializer.hpp @@ -11,6 +11,7 @@ #include "../Polyfills/type_traits.hpp" #include "../Variant/VariantData.hpp" #include "EscapeSequence.hpp" +#include "Utf8.hpp" namespace ARDUINOJSON_NAMESPACE { @@ -192,7 +193,18 @@ class JsonDeserializer { if (c == '\\') { c = current(); if (c == '\0') return DeserializationError::IncompleteInput; - if (c == 'u') return DeserializationError::NotSupported; + if (c == 'u') { +#if ARDUINOJSON_DECODE_UNICODE + uint16_t codepoint; + move(); + DeserializationError err = parseCodepoint(codepoint); + if (err) return err; + Utf8::encodeCodepoint(codepoint, builder); + continue; +#else + return DeserializationError::NotSupported; +#endif + } // replace char c = EscapeSequence::unescapeChar(c); if (c == '\0') return DeserializationError::InvalidInput; @@ -256,6 +268,19 @@ class JsonDeserializer { return DeserializationError::Ok; } + DeserializationError parseCodepoint(uint16_t &codepoint) { + codepoint = 0; + for (uint8_t i = 0; i < 4; ++i) { + char digit = current(); + if (!digit) return DeserializationError::IncompleteInput; + uint8_t value = decodeHex(digit); + if (value > 0x0F) return DeserializationError::InvalidInput; + codepoint = uint16_t((codepoint << 4) | value); + move(); + } + return DeserializationError::Ok; + } + static inline bool isBetween(char c, char min, char max) { return min <= c && c <= max; } @@ -269,6 +294,12 @@ class JsonDeserializer { return c == '\'' || c == '\"'; } + static inline uint8_t decodeHex(char c) { + if (c < 'A') return uint8_t(c - '0'); + c &= ~0x20; // uppercase + return uint8_t(c - 'A' + 10); + } + DeserializationError skipSpacesAndComments() { for (;;) { switch (current()) { diff --git a/src/ArduinoJson/Json/Utf8.hpp b/src/ArduinoJson/Json/Utf8.hpp new file mode 100644 index 00000000..9e6b506b --- /dev/null +++ b/src/ArduinoJson/Json/Utf8.hpp @@ -0,0 +1,26 @@ +// ArduinoJson - arduinojson.org +// Copyright Benoit Blanchon 2014-2018 +// MIT License + +#pragma once + +namespace ARDUINOJSON_NAMESPACE { + +namespace Utf8 { +template +inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) { + if (codepoint < 0x80) { + str.append(char(codepoint)); + return; + } + + if (codepoint >= 0x00000800) { + str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12))); + str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80)); + } else { + str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6))); + } + str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80)); +} +} // namespace Utf8 +} // namespace ARDUINOJSON_NAMESPACE diff --git a/src/ArduinoJson/Namespace.hpp b/src/ArduinoJson/Namespace.hpp index be412ce3..d2b9a5f0 100644 --- a/src/ArduinoJson/Namespace.hpp +++ b/src/ArduinoJson/Namespace.hpp @@ -19,4 +19,5 @@ #define ARDUINOJSON_NAMESPACE \ ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \ ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \ - _, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE) + _, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE, \ + ARDUINOJSON_DECODE_UNICODE) diff --git a/test/JsonDeserializer/CMakeLists.txt b/test/JsonDeserializer/CMakeLists.txt index 9508082a..485ce3a6 100644 --- a/test/JsonDeserializer/CMakeLists.txt +++ b/test/JsonDeserializer/CMakeLists.txt @@ -9,6 +9,7 @@ add_executable(JsonDeserializerTests deserializeJsonObject.cpp deserializeJsonObjectStatic.cpp deserializeJsonValue.cpp + deserializeJsonString.cpp input_types.cpp nestingLimit.cpp ) diff --git a/test/JsonDeserializer/deserializeJsonObject.cpp b/test/JsonDeserializer/deserializeJsonObject.cpp index 884729fc..ed36c969 100644 --- a/test/JsonDeserializer/deserializeJsonObject.cpp +++ b/test/JsonDeserializer/deserializeJsonObject.cpp @@ -272,6 +272,12 @@ TEST_CASE("deserialize JSON object") { REQUIRE(err == DeserializationError::Ok); } + + SECTION("Repeated key") { + DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}"); + + REQUIRE(err == DeserializationError::Ok); + } } SECTION("Block comments") { diff --git a/test/JsonDeserializer/deserializeJsonString.cpp b/test/JsonDeserializer/deserializeJsonString.cpp new file mode 100644 index 00000000..e1bca182 --- /dev/null +++ b/test/JsonDeserializer/deserializeJsonString.cpp @@ -0,0 +1,66 @@ +// ArduinoJson - arduinojson.org +// Copyright Benoit Blanchon 2014-2018 +// MIT License + +#define ARDUINOJSON_DECODE_UNICODE 1 +#include +#include + +using namespace Catch::Matchers; + +TEST_CASE("Valid JSON strings value") { + struct TestCase { + const char* input; + const char* expectedOutput; + }; + + TestCase testCases[] = { + {"\"hello world\"", "hello world"}, + {"\'hello world\'", "hello world"}, + {"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"}, + {"'\\u0041'", "A"}, + {"'\\u00e4'", "\xc3\xa4"}, // ä + {"'\\u00E4'", "\xc3\xa4"}, // ä + {"'\\u3042'", "\xe3\x81\x82"}, // あ + + }; + const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); + + DynamicJsonDocument doc(4096); + + for (size_t i = 0; i < testCount; i++) { + const TestCase& testCase = testCases[i]; + CAPTURE(testCase.input); + DeserializationError err = deserializeJson(doc, testCase.input); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.as() == testCase.expectedOutput); + } +} + +TEST_CASE("Truncated JSON string") { + const char* testCases[] = {"\"hello", "\'hello", "'\\u", "'\\u00", "'\\u000"}; + const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); + + DynamicJsonDocument doc(4096); + + for (size_t i = 0; i < testCount; i++) { + const char* input = testCases[i]; + CAPTURE(input); + REQUIRE(deserializeJson(doc, input) == + DeserializationError::IncompleteInput); + } +} + +TEST_CASE("Invalid JSON string") { + const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'", + "'\\u000G'", "'\\u000/'", "\\x1234"}; + const size_t testCount = sizeof(testCases) / sizeof(testCases[0]); + + DynamicJsonDocument doc(4096); + + for (size_t i = 0; i < testCount; i++) { + const char* input = testCases[i]; + CAPTURE(input); + REQUIRE(deserializeJson(doc, input) == DeserializationError::InvalidInput); + } +} diff --git a/test/JsonDeserializer/deserializeJsonValue.cpp b/test/JsonDeserializer/deserializeJsonValue.cpp index 717b78ce..8b33cd3d 100644 --- a/test/JsonDeserializer/deserializeJsonValue.cpp +++ b/test/JsonDeserializer/deserializeJsonValue.cpp @@ -15,161 +15,133 @@ using ARDUINOJSON_NAMESPACE::isnan; TEST_CASE("deserializeJson(DynamicJsonDocument&)") { DynamicJsonDocument doc(4096); - SECTION("null char*") { - DeserializationError err = deserializeJson(doc, static_cast(0)); + SECTION("Edge cases") { + SECTION("null char*") { + DeserializationError err = deserializeJson(doc, static_cast(0)); - REQUIRE(err != DeserializationError::Ok); + REQUIRE(err != DeserializationError::Ok); + } + + SECTION("null const char*") { + DeserializationError err = + deserializeJson(doc, static_cast(0)); + + REQUIRE(err != DeserializationError::Ok); + } + + SECTION("Empty input") { + DeserializationError err = deserializeJson(doc, ""); + + REQUIRE(err == DeserializationError::IncompleteInput); + } + + SECTION("issue #628") { + DeserializationError err = deserializeJson(doc, "null"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == false); + } + + SECTION("Garbage") { + DeserializationError err = deserializeJson(doc, "%*$£¤"); + + REQUIRE(err == DeserializationError::InvalidInput); + } } - SECTION("null const char*") { - DeserializationError err = - deserializeJson(doc, static_cast(0)); + SECTION("Integers") { + SECTION("0") { + DeserializationError err = deserializeJson(doc, "0"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == true); + REQUIRE(doc.as() == 0); + REQUIRE(doc.as() == "0"); // issue #808 + } - REQUIRE(err != DeserializationError::Ok); + SECTION("Negative") { + DeserializationError err = deserializeJson(doc, "-42"); + + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is()); + REQUIRE_FALSE(doc.is()); + REQUIRE(doc.as() == -42); + } } - SECTION("Integer") { - DeserializationError err = deserializeJson(doc, "-42"); + SECTION("Floats") { + SECTION("Double") { + DeserializationError err = deserializeJson(doc, "-1.23e+4"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is()); - REQUIRE_FALSE(doc.is()); - REQUIRE(doc.as() == -42); + REQUIRE(err == DeserializationError::Ok); + REQUIRE_FALSE(doc.is()); + REQUIRE(doc.is()); + REQUIRE(doc.as() == Approx(-1.23e+4)); + } + + SECTION("NaN") { + DeserializationError err = deserializeJson(doc, "NaN"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == true); + REQUIRE(my::isnan(doc.as())); + } + + SECTION("Infinity") { + DeserializationError err = deserializeJson(doc, "Infinity"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == true); + REQUIRE(my::isinf(doc.as())); + } + + SECTION("+Infinity") { + DeserializationError err = deserializeJson(doc, "+Infinity"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == true); + REQUIRE(my::isinf(doc.as())); + } + + SECTION("-Infinity") { + DeserializationError err = deserializeJson(doc, "-Infinity"); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is() == true); + REQUIRE(my::isinf(doc.as())); + } } - SECTION("Double") { - DeserializationError err = deserializeJson(doc, "-1.23e+4"); + SECTION("Booleans") { + SECTION("True") { + DeserializationError err = deserializeJson(doc, "true"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE_FALSE(doc.is()); - REQUIRE(doc.is()); - REQUIRE(doc.as() == Approx(-1.23e+4)); + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is()); + REQUIRE(doc.as() == true); + } + + SECTION("False") { + DeserializationError err = deserializeJson(doc, "false"); + + REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is()); + REQUIRE(doc.as() == false); + } } - SECTION("Double quoted string") { - DeserializationError err = deserializeJson(doc, "\"hello world\""); + SECTION("Comments") { + SECTION("Just a trailing comment") { + DeserializationError err = deserializeJson(doc, "// comment"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is()); - REQUIRE_THAT(doc.as(), Equals("hello world")); - } + REQUIRE(err == DeserializationError::IncompleteInput); + } - SECTION("Single quoted string") { - DeserializationError err = deserializeJson(doc, "\'hello world\'"); + SECTION("Just a block comment") { + DeserializationError err = deserializeJson(doc, "/*comment*/"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is()); - REQUIRE_THAT(doc.as(), Equals("hello world")); - } + REQUIRE(err == DeserializationError::IncompleteInput); + } - SECTION("Escape sequences") { - DeserializationError err = - deserializeJson(doc, "\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\""); + SECTION("Just a slash") { + DeserializationError err = deserializeJson(doc, "/"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.as() == "1\"2\\3/4\b5\f6\n7\r8\t9"); - } - - SECTION("UTF-16 surrogate") { - DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\""); - - REQUIRE(err == DeserializationError::NotSupported); - } - - SECTION("True") { - DeserializationError err = deserializeJson(doc, "true"); - - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is()); - REQUIRE(doc.as() == true); - } - - SECTION("False") { - DeserializationError err = deserializeJson(doc, "false"); - - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is()); - REQUIRE(doc.as() == false); - } - - SECTION("0") { - DeserializationError err = deserializeJson(doc, "0"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == true); - REQUIRE(doc.as() == 0); - REQUIRE(doc.as() == "0"); // issue #808 - } - - SECTION("NaN") { - DeserializationError err = deserializeJson(doc, "NaN"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == true); - REQUIRE(my::isnan(doc.as())); - } - - SECTION("Infinity") { - DeserializationError err = deserializeJson(doc, "Infinity"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == true); - REQUIRE(my::isinf(doc.as())); - } - - SECTION("+Infinity") { - DeserializationError err = deserializeJson(doc, "+Infinity"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == true); - REQUIRE(my::isinf(doc.as())); - } - - SECTION("-Infinity") { - DeserializationError err = deserializeJson(doc, "-Infinity"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == true); - REQUIRE(my::isinf(doc.as())); - } - - SECTION("issue #628") { - DeserializationError err = deserializeJson(doc, "null"); - REQUIRE(err == DeserializationError::Ok); - REQUIRE(doc.is() == false); - } - - SECTION("Should clear the JsonVariant") { - deserializeJson(doc, "[1,2,3]"); - deserializeJson(doc, "{}"); - - REQUIRE(doc.is()); - REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0)); - } - - SECTION("Empty input") { - DeserializationError err = deserializeJson(doc, ""); - - REQUIRE(err == DeserializationError::IncompleteInput); - } - - SECTION("Just a trailing comment") { - DeserializationError err = deserializeJson(doc, "// comment"); - - REQUIRE(err == DeserializationError::IncompleteInput); - } - - SECTION("Just a block comment") { - DeserializationError err = deserializeJson(doc, "/*comment*/"); - - REQUIRE(err == DeserializationError::IncompleteInput); - } - - SECTION("Just a slash") { - DeserializationError err = deserializeJson(doc, "/"); - - REQUIRE(err == DeserializationError::InvalidInput); - } - - SECTION("Garbage") { - DeserializationError err = deserializeJson(doc, "%*$£¤"); - - REQUIRE(err == DeserializationError::InvalidInput); + REQUIRE(err == DeserializationError::InvalidInput); + } } SECTION("Premature null-terminator") { @@ -224,9 +196,11 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") { } } - SECTION("Repeated object key") { - DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}"); + SECTION("Should clear the JsonVariant") { + deserializeJson(doc, "[1,2,3]"); + deserializeJson(doc, "{}"); - REQUIRE(err == DeserializationError::Ok); + REQUIRE(doc.is()); + REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0)); } } diff --git a/test/MixedConfiguration/CMakeLists.txt b/test/MixedConfiguration/CMakeLists.txt index 4dc1a113..eb715c7a 100644 --- a/test/MixedConfiguration/CMakeLists.txt +++ b/test/MixedConfiguration/CMakeLists.txt @@ -6,6 +6,8 @@ set(CMAKE_CXX_STANDARD 11) add_executable(MixedConfigurationTests + decode_unicode_0.cpp + decode_unicode_1.cpp use_double_0.cpp use_double_1.cpp use_long_long_0.cpp diff --git a/test/MixedConfiguration/decode_unicode_0.cpp b/test/MixedConfiguration/decode_unicode_0.cpp new file mode 100644 index 00000000..8b02aec3 --- /dev/null +++ b/test/MixedConfiguration/decode_unicode_0.cpp @@ -0,0 +1,11 @@ +#define ARDUINOJSON_DECODE_UNICODE 0 +#include + +#include + +TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 0") { + DynamicJsonDocument doc(2048); + DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\""); + + REQUIRE(err == DeserializationError::NotSupported); +} diff --git a/test/MixedConfiguration/decode_unicode_1.cpp b/test/MixedConfiguration/decode_unicode_1.cpp new file mode 100644 index 00000000..2b5b6523 --- /dev/null +++ b/test/MixedConfiguration/decode_unicode_1.cpp @@ -0,0 +1,11 @@ +#define ARDUINOJSON_DECODE_UNICODE 1 +#include + +#include + +TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 1") { + DynamicJsonDocument doc(2048); + DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\""); + + REQUIRE(err == DeserializationError::Ok); +}