forked from bblanchon/ArduinoJson
Improved decoding of UTF-16 surrogate pairs (closes #1157)
This commit is contained in:
committed by
Benoit Blanchon
parent
8550418875
commit
91b808381e
@ -7,7 +7,10 @@ HEAD
|
|||||||
* Added `BasicJsonDocument::shrinkToFit()`
|
* Added `BasicJsonDocument::shrinkToFit()`
|
||||||
* Added support of `uint8_t` for `serializeJson()`, `serializeJsonPretty()`, and `serializeMsgPack()` (issue #1142)
|
* Added support of `uint8_t` for `serializeJson()`, `serializeJsonPretty()`, and `serializeMsgPack()` (issue #1142)
|
||||||
* Auto enable support for `std::string` and `std::stream` on modern compilers (issue #1156)
|
* Auto enable support for `std::string` and `std::stream` on modern compilers (issue #1156)
|
||||||
No need to define `ARDUINOJSON_ENABLE_STD_STRING` and `ARDUINOJSON_ENABLE_STD_STREAM`.
|
(No need to define `ARDUINOJSON_ENABLE_STD_STRING` and `ARDUINOJSON_ENABLE_STD_STREAM` anymore)
|
||||||
|
* Improved decoding of UTF-16 surrogate pairs (PR #1157 by @kaysievers)
|
||||||
|
(ArduinoJson now produces standard UTF-8 instead of CESU-8)
|
||||||
|
|
||||||
|
|
||||||
v6.13.0 (2019-11-01)
|
v6.13.0 (2019-11-01)
|
||||||
-------
|
-------
|
||||||
|
@ -7,9 +7,10 @@
|
|||||||
#include <catch.hpp>
|
#include <catch.hpp>
|
||||||
|
|
||||||
TEST_CASE("Invalid JSON input") {
|
TEST_CASE("Invalid JSON input") {
|
||||||
const char* testCases[] = {"'\\u'", "'\\u000g'", "'\\u000'", "'\\u000G'",
|
const char* testCases[] = {
|
||||||
"'\\u000/'", "\\x1234", "6a9", "1,",
|
"'\\u'", "'\\u000g'", "'\\u000'", "'\\u000G'", "'\\ud83d\\ud83d'",
|
||||||
"2]", "3}"};
|
"'\\udda4'", "'\\ud83d_'", "'\\u000/'", "\\x1234", "6a9",
|
||||||
|
"1,", "2]", "3}"};
|
||||||
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
|
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
|
||||||
|
|
||||||
DynamicJsonDocument doc(4096);
|
DynamicJsonDocument doc(4096);
|
||||||
|
@ -17,10 +17,10 @@ TEST_CASE("Valid JSON strings value") {
|
|||||||
{"\'hello world\'", "hello world"},
|
{"\'hello world\'", "hello world"},
|
||||||
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
|
{"\"1\\\"2\\\\3\\/4\\b5\\f6\\n7\\r8\\t9\"", "1\"2\\3/4\b5\f6\n7\r8\t9"},
|
||||||
{"'\\u0041'", "A"},
|
{"'\\u0041'", "A"},
|
||||||
{"'\\u00e4'", "\xc3\xa4"}, // ä
|
{"'\\u00e4'", "\xc3\xa4"}, // ä
|
||||||
{"'\\u00E4'", "\xc3\xa4"}, // ä
|
{"'\\u00E4'", "\xc3\xa4"}, // ä
|
||||||
{"'\\u3042'", "\xe3\x81\x82"}, // あ
|
{"'\\u3042'", "\xe3\x81\x82"}, // あ
|
||||||
|
{"'\\ud83d\\udda4'", "\xf0\x9f\x96\xa4"}, // 🖤
|
||||||
};
|
};
|
||||||
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
|
const size_t testCount = sizeof(testCases) / sizeof(testCases[0]);
|
||||||
|
|
||||||
|
@ -189,6 +189,7 @@ class JsonDeserializer {
|
|||||||
|
|
||||||
DeserializationError parseQuotedString(const char *&result) {
|
DeserializationError parseQuotedString(const char *&result) {
|
||||||
StringBuilder builder = _stringStorage.startString();
|
StringBuilder builder = _stringStorage.startString();
|
||||||
|
uint16_t surrogate1 = 0;
|
||||||
const char stopChar = current();
|
const char stopChar = current();
|
||||||
|
|
||||||
move();
|
move();
|
||||||
@ -208,7 +209,19 @@ class JsonDeserializer {
|
|||||||
move();
|
move();
|
||||||
DeserializationError err = parseCodepoint(codepoint);
|
DeserializationError err = parseCodepoint(codepoint);
|
||||||
if (err) return err;
|
if (err) return err;
|
||||||
Utf8::encodeCodepoint(codepoint, builder);
|
if (codepoint >= 0xd800 && codepoint <= 0xdbff) {
|
||||||
|
if (surrogate1 > 0) return DeserializationError::InvalidInput;
|
||||||
|
surrogate1 = codepoint;
|
||||||
|
} else if (codepoint >= 0xdc00 && codepoint <= 0xdfff) {
|
||||||
|
if (surrogate1 == 0) return DeserializationError::InvalidInput;
|
||||||
|
uint32_t codepoint32 = 0x10000;
|
||||||
|
codepoint32 += static_cast<uint32_t>(surrogate1 - 0xd800) << 10;
|
||||||
|
codepoint32 += codepoint - 0xdc00;
|
||||||
|
Utf8::encodeCodepoint(codepoint32, builder);
|
||||||
|
surrogate1 = 0;
|
||||||
|
} else {
|
||||||
|
Utf8::encodeCodepoint(codepoint, builder);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
#else
|
#else
|
||||||
return DeserializationError::NotSupported;
|
return DeserializationError::NotSupported;
|
||||||
@ -220,6 +233,8 @@ class JsonDeserializer {
|
|||||||
move();
|
move();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (surrogate1 > 0) return DeserializationError::InvalidInput;
|
||||||
|
|
||||||
builder.append(c);
|
builder.append(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,17 +10,21 @@ namespace ARDUINOJSON_NAMESPACE {
|
|||||||
|
|
||||||
namespace Utf8 {
|
namespace Utf8 {
|
||||||
template <typename TStringBuilder>
|
template <typename TStringBuilder>
|
||||||
inline void encodeCodepoint(uint16_t codepoint, TStringBuilder &str) {
|
inline void encodeCodepoint(uint32_t codepoint, TStringBuilder &str) {
|
||||||
if (codepoint < 0x80) {
|
if (codepoint < 0x80) {
|
||||||
str.append(char(codepoint));
|
str.append(char(codepoint));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (codepoint >= 0x00000800) {
|
if (codepoint < 0x00000800) {
|
||||||
|
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
|
||||||
|
} else if (codepoint < 0x00010000) {
|
||||||
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
|
str.append(char(0xe0 /*0b11100000*/ | (codepoint >> 12)));
|
||||||
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
|
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
|
||||||
} else {
|
} else if (codepoint < 0x00110000) {
|
||||||
str.append(char(0xc0 /*0b11000000*/ | (codepoint >> 6)));
|
str.append(char(0xf0 /*0b11110000*/ | (codepoint >> 18)));
|
||||||
|
str.append(char(((codepoint >> 12) & 0x3f /*0b00111111*/) | 0x80));
|
||||||
|
str.append(char(((codepoint >> 6) & 0x3f /*0b00111111*/) | 0x80));
|
||||||
}
|
}
|
||||||
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
|
str.append(char((codepoint & 0x3f /*0b00111111*/) | 0x80));
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user