Decode escaped Unicode characters like \u00DE (issue #304, PR #791)

This commit is contained in:
Benoit Blanchon
2019-02-15 13:29:30 +01:00
parent 070cd5b6c0
commit 7050ef675d
12 changed files with 284 additions and 143 deletions

View File

@ -1,6 +1,13 @@
ArduinoJson: change log ArduinoJson: change log
======================= =======================
HEAD
----
* Decode escaped Unicode characters like \u00DE (issue #304, PR #791)
Many thanks to Daniel Schulte (aka @trilader) who implemented this feature.
* Add option ARDUINOJSON_DECODE_UNICODE to enable it
v6.8.0-beta (2019-01-30) v6.8.0-beta (2019-01-30)
----------- -----------

View File

@ -120,6 +120,11 @@
#endif #endif
#endif #endif
// Convert unicode escape sequence (\u0123) to UTF-8
#ifndef ARDUINOJSON_DECODE_UNICODE
#define ARDUINOJSON_DECODE_UNICODE 0
#endif
// Control the exponentiation threshold for big numbers // Control the exponentiation threshold for big numbers
// CAUTION: cannot be more that 1e9 !!!! // CAUTION: cannot be more that 1e9 !!!!
#ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD #ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD

View File

@ -11,6 +11,7 @@
#include "../Polyfills/type_traits.hpp" #include "../Polyfills/type_traits.hpp"
#include "../Variant/VariantData.hpp" #include "../Variant/VariantData.hpp"
#include "EscapeSequence.hpp" #include "EscapeSequence.hpp"
#include "Utf8.hpp"
namespace ARDUINOJSON_NAMESPACE { namespace ARDUINOJSON_NAMESPACE {
@ -192,7 +193,18 @@ class JsonDeserializer {
if (c == '\\') { if (c == '\\') {
c = current(); c = current();
if (c == '\0') return DeserializationError::IncompleteInput; if (c == '\0') return DeserializationError::IncompleteInput;
if (c == 'u') return DeserializationError::NotSupported; if (c == 'u') {
#if ARDUINOJSON_DECODE_UNICODE
uint16_t codepoint;
move();
DeserializationError err = parseCodepoint(codepoint);
if (err) return err;
Utf8::encodeCodepoint(codepoint, builder);
continue;
#else
return DeserializationError::NotSupported;
#endif
}
// replace char // replace char
c = EscapeSequence::unescapeChar(c); c = EscapeSequence::unescapeChar(c);
if (c == '\0') return DeserializationError::InvalidInput; if (c == '\0') return DeserializationError::InvalidInput;
@ -256,6 +268,19 @@ class JsonDeserializer {
return DeserializationError::Ok; return DeserializationError::Ok;
} }
DeserializationError parseCodepoint(uint16_t &codepoint) {
codepoint = 0;
for (uint8_t i = 0; i < 4; ++i) {
char digit = current();
if (!digit) return DeserializationError::IncompleteInput;
uint8_t value = decodeHex(digit);
if (value > 0x0F) return DeserializationError::InvalidInput;
codepoint = uint16_t((codepoint << 4) | value);
move();
}
return DeserializationError::Ok;
}
static inline bool isBetween(char c, char min, char max) { static inline bool isBetween(char c, char min, char max) {
return min <= c && c <= max; return min <= c && c <= max;
} }
@ -269,6 +294,12 @@ class JsonDeserializer {
return c == '\'' || c == '\"'; return c == '\'' || c == '\"';
} }
static inline uint8_t decodeHex(char c) {
if (c < 'A') return uint8_t(c - '0');
c &= ~0x20; // uppercase
return uint8_t(c - 'A' + 10);
}
DeserializationError skipSpacesAndComments() { DeserializationError skipSpacesAndComments() {
for (;;) { for (;;) {
switch (current()) { switch (current()) {

View File

@ -0,0 +1,26 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2018
// MIT License
#pragma once
namespace ARDUINOJSON_NAMESPACE {
namespace Utf8 {
template <typename TStringBuilder>
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
if (codepoint < 0x80) {
str.append(char(codepoint));
return;
}
if (codepoint >= 0x00000800) {
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
} else {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
}
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
}
} // namespace Utf8
} // namespace ARDUINOJSON_NAMESPACE

View File

@ -19,4 +19,5 @@
#define ARDUINOJSON_NAMESPACE \ #define ARDUINOJSON_NAMESPACE \
ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \ ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \
ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \ ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \
_, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE) _, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE, \
ARDUINOJSON_DECODE_UNICODE)

View File

@ -9,6 +9,7 @@ add_executable(JsonDeserializerTests
deserializeJsonObject.cpp deserializeJsonObject.cpp
deserializeJsonObjectStatic.cpp deserializeJsonObjectStatic.cpp
deserializeJsonValue.cpp deserializeJsonValue.cpp
deserializeJsonString.cpp
input_types.cpp input_types.cpp
nestingLimit.cpp nestingLimit.cpp
) )

View File

@ -272,6 +272,12 @@ TEST_CASE("deserialize JSON object") {
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
} }
SECTION("Repeated key") {
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
REQUIRE(err == DeserializationError::Ok);
}
} }
SECTION("Block comments") { SECTION("Block comments") {

View File

@ -0,0 +1,66 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2018
// MIT License
#define ARDUINOJSON_DECODE_UNICODE 1
#include <ArduinoJson.h>
#include <catch.hpp>
using namespace Catch::Matchers;
TEST_CASE("Valid JSON strings value") {
struct TestCase {
const char* input;
const char* expectedOutput;
};
TestCase testCases[] = {
{"\"hello world\"", "hello world"},
{"\'hello world\'", "hello world"},
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
{"'\\u0041'", "A"},
{"'\\u00e4'", "\xc3\xa4"}, // ä
{"'\\u00E4'", "\xc3\xa4"}, // ä
{"'\\u3042'", "\xe3\x81\x82"}, // あ
};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const TestCase& testCase = testCases[i];
CAPTURE(testCase.input);
DeserializationError err = deserializeJson(doc, testCase.input);
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.as<std::string>() == testCase.expectedOutput);
}
}
TEST_CASE("Truncated JSON string") {
const char* testCases[] = {"\"hello", "\'hello", "'\\u", "'\\u00", "'\\u000"};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const char* input = testCases[i];
CAPTURE(input);
REQUIRE(deserializeJson(doc, input) ==
DeserializationError::IncompleteInput);
}
}
TEST_CASE("Invalid JSON string") {
const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'",
"'\\u000G'", "'\\u000/'", "\\x1234"};
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
DynamicJsonDocument doc(4096);
for (size_t i = 0; i < testCount; i++) {
const char* input = testCases[i];
CAPTURE(input);
REQUIRE(deserializeJson(doc, input) == DeserializationError::InvalidInput);
}
}

View File

@ -15,161 +15,133 @@ using ARDUINOJSON_NAMESPACE::isnan;
TEST_CASE("deserializeJson(DynamicJsonDocument&)") { TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
DynamicJsonDocument doc(4096); DynamicJsonDocument doc(4096);
SECTION("null char*") { SECTION("Edge cases") {
DeserializationError err = deserializeJson(doc, static_cast<char*>(0)); SECTION("null char*") {
DeserializationError err = deserializeJson(doc, static_cast<char*>(0));
REQUIRE(err != DeserializationError::Ok); REQUIRE(err != DeserializationError::Ok);
}
SECTION("null const char*") {
DeserializationError err =
deserializeJson(doc, static_cast<const char*>(0));
REQUIRE(err != DeserializationError::Ok);
}
SECTION("Empty input") {
DeserializationError err = deserializeJson(doc, "");
REQUIRE(err == DeserializationError::IncompleteInput);
}
SECTION("issue #628") {
DeserializationError err = deserializeJson(doc, "null");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == false);
}
SECTION("Garbage") {
DeserializationError err = deserializeJson(doc, "%*$£¤");
REQUIRE(err == DeserializationError::InvalidInput);
}
} }
SECTION("null const char*") { SECTION("Integers") {
DeserializationError err = SECTION("0") {
deserializeJson(doc, static_cast<const char*>(0)); DeserializationError err = deserializeJson(doc, "0");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>() == true);
REQUIRE(doc.as<int>() == 0);
REQUIRE(doc.as<std::string>() == "0"); // issue #808
}
REQUIRE(err != DeserializationError::Ok); SECTION("Negative") {
DeserializationError err = deserializeJson(doc, "-42");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>());
REQUIRE_FALSE(doc.is<bool>());
REQUIRE(doc.as<int>() == -42);
}
} }
SECTION("Integer") { SECTION("Floats") {
DeserializationError err = deserializeJson(doc, "-42"); SECTION("Double") {
DeserializationError err = deserializeJson(doc, "-1.23e+4");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>()); REQUIRE_FALSE(doc.is<int>());
REQUIRE_FALSE(doc.is<bool>()); REQUIRE(doc.is<double>());
REQUIRE(doc.as<int>() == -42); REQUIRE(doc.as<double>() == Approx(-1.23e+4));
}
SECTION("NaN") {
DeserializationError err = deserializeJson(doc, "NaN");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isnan(doc.as<float>()));
}
SECTION("Infinity") {
DeserializationError err = deserializeJson(doc, "Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
SECTION("+Infinity") {
DeserializationError err = deserializeJson(doc, "+Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
SECTION("-Infinity") {
DeserializationError err = deserializeJson(doc, "-Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
} }
SECTION("Double") { SECTION("Booleans") {
DeserializationError err = deserializeJson(doc, "-1.23e+4"); SECTION("True") {
DeserializationError err = deserializeJson(doc, "true");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::Ok);
REQUIRE_FALSE(doc.is<int>()); REQUIRE(doc.is<bool>());
REQUIRE(doc.is<double>()); REQUIRE(doc.as<bool>() == true);
REQUIRE(doc.as<double>() == Approx(-1.23e+4)); }
SECTION("False") {
DeserializationError err = deserializeJson(doc, "false");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == false);
}
} }
SECTION("Double quoted string") { SECTION("Comments") {
DeserializationError err = deserializeJson(doc, "\"hello world\""); SECTION("Just a trailing comment") {
DeserializationError err = deserializeJson(doc, "// comment");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::IncompleteInput);
REQUIRE(doc.is<char*>()); }
REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
}
SECTION("Single quoted string") { SECTION("Just a block comment") {
DeserializationError err = deserializeJson(doc, "\'hello world\'"); DeserializationError err = deserializeJson(doc, "/*comment*/");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::IncompleteInput);
REQUIRE(doc.is<char*>()); }
REQUIRE_THAT(doc.as<char*>(), Equals("hello world"));
}
SECTION("Escape sequences") { SECTION("Just a slash") {
DeserializationError err = DeserializationError err = deserializeJson(doc, "/");
deserializeJson(doc, "\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"");
REQUIRE(err == DeserializationError::Ok); REQUIRE(err == DeserializationError::InvalidInput);
REQUIRE(doc.as<std::string>() == "1\"2\\3/4\b5\f6\n7\r8\t9"); }
}
SECTION("UTF-16 surrogate") {
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::NotSupported);
}
SECTION("True") {
DeserializationError err = deserializeJson(doc, "true");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == true);
}
SECTION("False") {
DeserializationError err = deserializeJson(doc, "false");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<bool>());
REQUIRE(doc.as<bool>() == false);
}
SECTION("0") {
DeserializationError err = deserializeJson(doc, "0");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<int>() == true);
REQUIRE(doc.as<int>() == 0);
REQUIRE(doc.as<std::string>() == "0"); // issue #808
}
SECTION("NaN") {
DeserializationError err = deserializeJson(doc, "NaN");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isnan(doc.as<float>()));
}
SECTION("Infinity") {
DeserializationError err = deserializeJson(doc, "Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
SECTION("+Infinity") {
DeserializationError err = deserializeJson(doc, "+Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
SECTION("-Infinity") {
DeserializationError err = deserializeJson(doc, "-Infinity");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == true);
REQUIRE(my::isinf(doc.as<float>()));
}
SECTION("issue #628") {
DeserializationError err = deserializeJson(doc, "null");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.is<float>() == false);
}
SECTION("Should clear the JsonVariant") {
deserializeJson(doc, "[1,2,3]");
deserializeJson(doc, "{}");
REQUIRE(doc.is<JsonObject>());
REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0));
}
SECTION("Empty input") {
DeserializationError err = deserializeJson(doc, "");
REQUIRE(err == DeserializationError::IncompleteInput);
}
SECTION("Just a trailing comment") {
DeserializationError err = deserializeJson(doc, "// comment");
REQUIRE(err == DeserializationError::IncompleteInput);
}
SECTION("Just a block comment") {
DeserializationError err = deserializeJson(doc, "/*comment*/");
REQUIRE(err == DeserializationError::IncompleteInput);
}
SECTION("Just a slash") {
DeserializationError err = deserializeJson(doc, "/");
REQUIRE(err == DeserializationError::InvalidInput);
}
SECTION("Garbage") {
DeserializationError err = deserializeJson(doc, "%*$£¤");
REQUIRE(err == DeserializationError::InvalidInput);
} }
SECTION("Premature null-terminator") { SECTION("Premature null-terminator") {
@ -224,9 +196,11 @@ TEST_CASE("deserializeJson(DynamicJsonDocument&)") {
} }
} }
SECTION("Repeated object key") { SECTION("Should clear the JsonVariant") {
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}"); deserializeJson(doc, "[1,2,3]");
deserializeJson(doc, "{}");
REQUIRE(err == DeserializationError::Ok); REQUIRE(doc.is<JsonObject>());
REQUIRE(doc.memoryUsage() == JSON_OBJECT_SIZE(0));
} }
} }

View File

@ -6,6 +6,8 @@
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
add_executable(MixedConfigurationTests add_executable(MixedConfigurationTests
decode_unicode_0.cpp
decode_unicode_1.cpp
use_double_0.cpp use_double_0.cpp
use_double_1.cpp use_double_1.cpp
use_long_long_0.cpp use_long_long_0.cpp

View File

@ -0,0 +1,11 @@
#define ARDUINOJSON_DECODE_UNICODE 0
#include <ArduinoJson.h>
#include <catch.hpp>
TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 0") {
DynamicJsonDocument doc(2048);
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::NotSupported);
}

View File

@ -0,0 +1,11 @@
#define ARDUINOJSON_DECODE_UNICODE 1
#include <ArduinoJson.h>
#include <catch.hpp>
TEST_CASE("ARDUINOJSON_DECODE_UNICODE == 1") {
DynamicJsonDocument doc(2048);
DeserializationError err = deserializeJson(doc, "\"\\uD834\\uDD1E\"");
REQUIRE(err == DeserializationError::Ok);
}