Decode escaped Unicode characters like \u00DE (issue #304, PR #791)

This commit is contained in:
Benoit Blanchon
2019-02-15 13:29:30 +01:00
parent 070cd5b6c0
commit 7050ef675d
12 changed files with 284 additions and 143 deletions

View File

@ -120,6 +120,11 @@
#endif
#endif
// Convert unicode escape sequence (\u0123) to UTF-8
#ifndef ARDUINOJSON_DECODE_UNICODE
#define ARDUINOJSON_DECODE_UNICODE 0
#endif
// Control the exponentiation threshold for big numbers
// CAUTION: cannot be more that 1e9 !!!!
#ifndef ARDUINOJSON_POSITIVE_EXPONENTIATION_THRESHOLD

View File

@ -11,6 +11,7 @@
#include "../Polyfills/type_traits.hpp"
#include "../Variant/VariantData.hpp"
#include "EscapeSequence.hpp"
#include "Utf8.hpp"
namespace ARDUINOJSON_NAMESPACE {
@ -192,7 +193,18 @@ class JsonDeserializer {
if (c == '\\') {
c = current();
if (c == '\0') return DeserializationError::IncompleteInput;
if (c == 'u') return DeserializationError::NotSupported;
if (c == 'u') {
#if ARDUINOJSON_DECODE_UNICODE
uint16_t codepoint;
move();
DeserializationError err = parseCodepoint(codepoint);
if (err) return err;
Utf8::encodeCodepoint(codepoint, builder);
continue;
#else
return DeserializationError::NotSupported;
#endif
}
// replace char
c = EscapeSequence::unescapeChar(c);
if (c == '\0') return DeserializationError::InvalidInput;
@ -256,6 +268,19 @@ class JsonDeserializer {
return DeserializationError::Ok;
}
DeserializationError parseCodepoint(uint16_t &codepoint) {
codepoint = 0;
for (uint8_t i = 0; i < 4; ++i) {
char digit = current();
if (!digit) return DeserializationError::IncompleteInput;
uint8_t value = decodeHex(digit);
if (value > 0x0F) return DeserializationError::InvalidInput;
codepoint = uint16_t((codepoint << 4) | value);
move();
}
return DeserializationError::Ok;
}
static inline bool isBetween(char c, char min, char max) {
return min <= c && c <= max;
}
@ -269,6 +294,12 @@ class JsonDeserializer {
return c == '\'' || c == '\"';
}
static inline uint8_t decodeHex(char c) {
if (c < 'A') return uint8_t(c - '0');
c &= ~0x20; // uppercase
return uint8_t(c - 'A' + 10);
}
DeserializationError skipSpacesAndComments() {
for (;;) {
switch (current()) {

View File

@ -0,0 +1,26 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2018
// MIT License
#pragma once
namespace ARDUINOJSON_NAMESPACE {
namespace Utf8 {
template <typename TStringBuilder>
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
if (codepoint < 0x80) {
str.append(char(codepoint));
return;
}
if (codepoint >= 0x00000800) {
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
} else {
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
}
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
}
} // namespace Utf8
} // namespace ARDUINOJSON_NAMESPACE

View File

@ -19,4 +19,5 @@
#define ARDUINOJSON_NAMESPACE \
ARDUINOJSON_CONCAT8(ArduinoJson, ARDUINOJSON_VERSION_MAJOR, \
ARDUINOJSON_VERSION_MINOR, ARDUINOJSON_VERSION_REVISION, \
_, ARDUINOJSON_USE_LONG_LONG, _, ARDUINOJSON_USE_DOUBLE)
_, ARDUINOJSON_USE_LONG_LONG, ARDUINOJSON_USE_DOUBLE, \
ARDUINOJSON_DECODE_UNICODE)