Decode escaped Unicode characters like \u00DE (issue #304, PR #791)

This commit is contained in:
Benoit Blanchon
2019-02-15 13:29:30 +01:00
parent 070cd5b6c0
commit 7050ef675d
12 changed files with 284 additions and 143 deletions

View File

@ -1,6 +1,13 @@
ArduinoJson: change log ArduinoJson: change log
======================= =======================
HEAD
----
* Decode escaped Unicode characters like \u00DE (issue #304, PR #791)
Many thanks to Daniel Schulte (aka @trilader) who implemented this feature.
* Add option ARDUINOJSON_DECODE_UNICODE to enable it
v6.8.0-beta (2019-01-30) v6.8.0-beta (2019-01-30)
----------- -----------

View File

@ -120,6 +120,11 @@
#endif #endif
#endif #endif
// Convert unicode escape sequence (\u0123) to UTF-8
#ifndef ARDUINOJSON_DECODE_UNICODE
#define ARDUINOJSON_DECODE_UNICODE 0
#endif
// Control the exponentiation threshold for big numbers // Control the exponentiation threshold for big numbers
// CAUTION: cannot be more that 1e9 !!!! // CAUTION: cannot be more that 1e9 !!!!
#ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD #ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD

View File

@ -11,6 +11,7 @@
#include "../Polyfills/type_traits.hpp" #include "../Polyfills/type_traits.hpp"
#include "../Variant/VariantData.hpp" #include "../Variant/VariantData.hpp"
#include "EscapeSequence.hpp" #include "EscapeSequence.hpp"
#include "Utf8.hpp"
namespace ARDUINOJSON_NAMESPACE { namespace ARDUINOJSON_NAMESPACE {
@ -192,7 +193,18 @@ class JsonDeserializer {
if (c == '\\') { if (c == '\\') {
c = current(); c = current();
if (c == '\0') return DeserializationError::IncompleteInput; if (c == '\0') return DeserializationError::IncompleteInput;
if (c == 'u') return DeserializationError::NotSupported; if (c == 'u') {
#if ARDUINOJSON_DECODE_UNICODE
uint16_t codepoint;
move();
DeserializationError err = parseCodepoint(codepoint);
if (err) return err;
Utf8::encodeCodepoint(codepoint, builder);
continue;
#else
return DeserializationError::NotSupported;
#endif
}
// replace char // replace char
c = EscapeSequence::unescapeChar(c); c = EscapeSequence::unescapeChar(c);
if (c == '\0') return DeserializationError::InvalidInput; if (c == '\0') return DeserializationError::InvalidInput;
@ -256,6 +268,19 @@ class JsonDeserializer {
return DeserializationError::Ok; return DeserializationError::Ok;
} }
DeserializationError parseCodepoint(uint16_t &codepoint) {
codepoint = 0;
for (uint8_t i = 0; i < 4; ++i) {
char digit = current();
if (!digit) return DeserializationError::IncompleteInput;
uint8_t value = decodeHex(digit);
if (value > 0x0F) return DeserializationError::InvalidInput;
codepoint = uint16_t((codepoint << 4) | value);
move();
}
return DeserializationError::Ok;
}
static inline bool isBetween(char c, char min, char max) { static inline bool isBetween(char c, char min, char max) {
return min <= c && c <= max; return min <= c && c <= max;
} }
@ -269,6 +294,12 @@ class JsonDeserializer {
return c == '\'' || c == '\"'; return c == '\'' || c == '\"';
} }
static inline uint8_t decodeHex(char c) {
if (c < 'A') return uint8_t(c - '0');
c &= ~0x20; // uppercase
return uint8_t(c - 'A' + 10);
}
DeserializationError skipSpacesAndComments() { DeserializationError skipSpacesAndComments() {
for (;;) { for (;;) {
switch (current()) { switch (current()) {

View File

@ -0,0 +1,26 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2018
// MIT License
#pragma once
namespace ARDUINOJSON_NAMESPACE {
namespace Utf8 {
template <typename TStringBuilder>
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
if (codepoint < 0x80) {
str.append(char(codepoint));
return;
}
if (codepoint >= 0x00000800) {
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
} else {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
}
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
}
} // namespace Utf8
} // namespace ARDUINOJSON_NAMESPACE

View File

@ -19,4 +19,5 @@
#define ARDUINOJSON_NAMESPACE \ #define ARDUINOJSON_NAMESPACE \
ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \ ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \
ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \ ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \
_, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE) _, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE, \
ARDUINOJSON_DECODE_UNICODE)

View File

@ -9,6 +9,7 @@ add_executable(JsonDeserializerTests
deserializeJsonObject.cpp deserializeJsonObject.cpp
deserializeJsonObjectStatic.cpp deserializeJsonObjectStatic.cpp
deserializeJsonValue.cpp deserializeJsonValue.cpp
deserializeJsonString.cpp
input_types.cpp input_types.cpp
nestingLimit.cpp nestingLimit.cpp
) )

View File

@ -272,6 +272,12 @@ TEST_CASE("deserialize JSON object") {
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
} }
SECTION("Repeated key") {
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
REQUIRE(err == DeserializationError::Ok);
}
} }
SECTION("Block comments") { SECTION("Block comments") {

View File

@ -0,0 +1,66 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2018
// MIT License
#define ARDUINOJSON_DECODE_UNICODE 1
#include <ArduinoJson.h>
#include <catch.hpp>
using namespace Catch::Matchers;
TEST_CASE("Valid JSON strings value") {
struct TestCase {
const char* input;
const char* expectedOutput;
};
TestCase testCases[] = {
{"\"hello world\"", "hello world"},
{"\'hello world\'", "hello world"},
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
{"'\\u0041'", "A"},
{"'\\u00e4'", "\xc3\xa4"}, // ä
{"'\\u00E4'", "\xc3\xa4"}, // ä
{"'\\u3042'", "\xe3\x81\x82"}, // あ
};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const TestCase& testCase = testCases[i];
CAPTURE(testCase.input);
DeserializationError err = deserializeJson(doc, testCase.input);
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.as<std::string>() == testCase.expectedOutput);
}
}
TEST_CASE("Truncated JSON string") {
const char* testCases[] = {"\"hello", "\'hello", "'\\u", "'\\u00", "'\\u000"};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const char* input = testCases[i];
CAPTURE(input);
REQUIRE(deserializeJson(doc, input) ==
DeserializationError::IncompleteInput);
}
}
TEST_CASE("Invalid JSON string") {
const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'",
"'\\u000G'", "'\\u000/'", "\\x1234"};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const char* input = testCases[i];
CAPTURE(input);
REQUIRE(deserializeJson(doc, input) == DeserializationError::InvalidInput);
}
}

View File

@ -15,6 +15,7 @@ using ARDUINOJSON_NAMESPACE::isnan;
TEST_CASE("deserializeJson(DynamicJsonDocument&)") { TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
DynamicJsonDocument doc(4096); DynamicJsonDocument doc(4096);
SECTION("Edge cases") {
SECTION("null char*") { SECTION("null char*") {
DeserializationError err = deserializeJson(doc, static_cast<char*>(0)); DeserializationError err = deserializeJson(doc, static_cast<char*>(0));
@ -28,7 +29,35 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
REQUIRE(err != DeserializationError::Ok); REQUIRE(err != DeserializationError::Ok);
} }
SECTION("Integer") { SECTION("Empty input") {
DeserializationError err = deserializeJson(doc, "");
REQUIRE(err == DeserializationError::IncompleteInput);
}
SECTION("issue #628") {
DeserializationError err = deserializeJson(doc, "null");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == false);
}
SECTION("Garbage") {
DeserializationError err = deserializeJson(doc, "%*$£¤");
REQUIRE(err == DeserializationError::InvalidInput);
}
}
SECTION("Integers") {
SECTION("0") {
DeserializationError err = deserializeJson(doc, "0");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>() == true);
REQUIRE(doc.as<int>() == 0);
REQUIRE(doc.as<std::string>() == "0"); // issue #808
}
SECTION("Negative") {
DeserializationError err = deserializeJson(doc, "-42"); DeserializationError err = deserializeJson(doc, "-42");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
@ -36,7 +65,9 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
REQUIRE_FALSE(doc.is<bool>()); REQUIRE_FALSE(doc.is<bool>());
REQUIRE(doc.as<int>() == -42); REQUIRE(doc.as<int>() == -42);
} }
}
SECTION("Floats") {
SECTION("Double") { SECTION("Double") {
DeserializationError err = deserializeJson(doc, "-1.23e+4"); DeserializationError err = deserializeJson(doc, "-1.23e+4");
@ -46,60 +77,6 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
REQUIRE(doc.as<double>() == Approx(-1.23e+4)); REQUIRE(doc.as<double>() == Approx(-1.23e+4));
} }
SECTION("Double quoted string") {
DeserializationError err = deserializeJson(doc, "\"hello world\"");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<char*>());
REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
}
SECTION("Single quoted string") {
DeserializationError err = deserializeJson(doc, "\'hello world\'");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<char*>());
REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
}
SECTION("Escape sequences") {
DeserializationError err =
deserializeJson(doc, "\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.as<std::string>() == "1\"2\\3/4\b5\f6\n7\r8\t9");
}
SECTION("UTF-16 surrogate") {
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::NotSupported);
}
SECTION("True") {
DeserializationError err = deserializeJson(doc, "true");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == true);
}
SECTION("False") {
DeserializationError err = deserializeJson(doc, "false");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == false);
}
SECTION("0") {
DeserializationError err = deserializeJson(doc, "0");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>() == true);
REQUIRE(doc.as<int>() == 0);
REQUIRE(doc.as<std::string>() == "0"); // issue #808
}
SECTION("NaN") { SECTION("NaN") {
DeserializationError err = deserializeJson(doc, "NaN"); DeserializationError err = deserializeJson(doc, "NaN");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
@ -127,27 +104,27 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
REQUIRE(doc.is<float>() == true); REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>())); REQUIRE(my::isinf(doc.as<float>()));
} }
}
SECTION("Booleans") {
SECTION("True") {
DeserializationError err = deserializeJson(doc, "true");
SECTION("issue #628") {
DeserializationError err = deserializeJson(doc, "null");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == false); REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == true);
} }
SECTION("Should clear the JsonVariant") { SECTION("False") {
deserializeJson(doc, "[1,2,3]"); DeserializationError err = deserializeJson(doc, "false");
deserializeJson(doc, "{}");
REQUIRE(doc.is<JsonObject>()); REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0)); REQUIRE(doc.is<bool>());
} REQUIRE(doc.as<bool>() == false);
}
SECTION("Empty input") {
DeserializationError err = deserializeJson(doc, "");
REQUIRE(err == DeserializationError::IncompleteInput);
} }
SECTION("Comments") {
SECTION("Just a trailing comment") { SECTION("Just a trailing comment") {
DeserializationError err = deserializeJson(doc, "// comment"); DeserializationError err = deserializeJson(doc, "// comment");
@ -165,11 +142,6 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
REQUIRE(err == DeserializationError::InvalidInput); REQUIRE(err == DeserializationError::InvalidInput);
} }
SECTION("Garbage") {
DeserializationError err = deserializeJson(doc, "%*$£¤");
REQUIRE(err == DeserializationError::InvalidInput);
} }
SECTION("Premature null-terminator") { SECTION("Premature null-terminator") {
@ -224,9 +196,11 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
} }
} }
SECTION("Repeated object key") { SECTION("Should clear the JsonVariant") {
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}"); deserializeJson(doc, "[1,2,3]");
deserializeJson(doc, "{}");
REQUIRE(err == DeserializationError::Ok); REQUIRE(doc.is<JsonObject>());
REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0));
} }
} }

View File

@ -6,6 +6,8 @@
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
add_executable(MixedConfigurationTests add_executable(MixedConfigurationTests
decode_unicode_0.cpp
decode_unicode_1.cpp
use_double_0.cpp use_double_0.cpp
use_double_1.cpp use_double_1.cpp
use_long_long_0.cpp use_long_long_0.cpp

View File

@ -0,0 +1,11 @@
#define ARDUINOJSON_DECODE_UNICODE 0
#include <ArduinoJson.h>
#include <catch.hpp>
TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 0") {
DynamicJsonDocument doc(2048);
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::NotSupported);
}

View File

@ -0,0 +1,11 @@
#define ARDUINOJSON_DECODE_UNICODE 1
#include <ArduinoJson.h>
#include <catch.hpp>
TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 1") {
DynamicJsonDocument doc(2048);
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::Ok);
}