Optimize storage of tiny strings (up to 3 characters)

This commit is contained in:
Benoit Blanchon
2025-04-09 08:55:08 +02:00
parent 7f75985e47
commit 91397f9f06
16 changed files with 229 additions and 57 deletions

View File

@ -1,6 +1,11 @@
ArduinoJson: change log
=======================
HEAD
----
* Optimize storage of tiny strings (up to 3 characters)
v7.3.1 (2025-02-27)
------

View File

@ -292,22 +292,23 @@ TEST_CASE("deserialize JSON object") {
}
SECTION("Repeated key") {
DeserializationError err = deserializeJson(doc, "{a:{b:{c:1}},a:2}");
DeserializationError err =
deserializeJson(doc, "{alfa:{bravo:{charlie:1}},alfa:2}");
REQUIRE(err == DeserializationError::Ok);
REQUIRE(doc.as<std::string>() == "{\"a\":2}");
REQUIRE(doc.as<std::string>() == "{\"alfa\":2}");
REQUIRE(spy.log() ==
AllocatorLog{
Allocate(sizeofStringBuffer()),
Allocate(sizeofPool()),
Reallocate(sizeofStringBuffer(), sizeofString("a")),
Reallocate(sizeofStringBuffer(), sizeofString("alfa")),
Allocate(sizeofStringBuffer()),
Reallocate(sizeofStringBuffer(), sizeofString("b")),
Reallocate(sizeofStringBuffer(), sizeofString("bravo")),
Allocate(sizeofStringBuffer()),
Reallocate(sizeofStringBuffer(), sizeofString("c")),
Reallocate(sizeofStringBuffer(), sizeofString("charlie")),
Allocate(sizeofStringBuffer()),
Deallocate(sizeofString("b")),
Deallocate(sizeofString("c")),
Deallocate(sizeofString("bravo")),
Deallocate(sizeofString("charlie")),
Deallocate(sizeofStringBuffer()),
Reallocate(sizeofPool(), sizeofObject(2) + sizeofObject(1)),
});
@ -389,11 +390,11 @@ TEST_CASE("deserialize JSON object under memory constraints") {
SECTION("string allocation fails") {
timebomb.setCountdown(3);
char input[] = "{\"a\":\"b\"}";
char input[] = "{\"alfa\":\"bravo\"}";
DeserializationError err = deserializeJson(doc, input);
REQUIRE(err == DeserializationError::NoMemory);
REQUIRE(doc.as<std::string>() == "{\"a\":null}");
REQUIRE(doc.as<std::string>() == "{\"alfa\":null}");
}
}

View File

@ -100,13 +100,13 @@ TEST_CASE("JsonObject::set()") {
JsonDocument doc3(&timebomb);
JsonObject obj3 = doc3.to<JsonObject>();
obj1["a"_s] = 1;
obj1["b"_s] = 2;
obj1["alpha"_s] = 1;
obj1["beta"_s] = 2;
bool success = obj3.set(obj1);
REQUIRE(success == false);
REQUIRE(doc3.as<std::string>() == "{\"a\":1}");
REQUIRE(doc3.as<std::string>() == "{\"alpha\":1}");
}
SECTION("copy fails in the middle of an array") {

View File

@ -199,7 +199,7 @@ TEST_CASE("JsonVariant::as()") {
REQUIRE(variant.as<JsonString>() == "hello");
}
SECTION("set(std::string(\"4.2\"))") {
SECTION("set(std::string(\"4.2\")) (tiny string optimization)") {
variant.set("4.2"_s);
REQUIRE(variant.as<bool>() == true);
@ -211,6 +211,18 @@ TEST_CASE("JsonVariant::as()") {
REQUIRE(variant.as<JsonString>().isStatic() == false);
}
SECTION("set(std::string(\"123.45\"))") {
variant.set("123.45"_s);
REQUIRE(variant.as<bool>() == true);
REQUIRE(variant.as<long>() == 123L);
REQUIRE(variant.as<double>() == Approx(123.45));
REQUIRE(variant.as<const char*>() == "123.45"_s);
REQUIRE(variant.as<std::string>() == "123.45"_s);
REQUIRE(variant.as<JsonString>() == "123.45");
REQUIRE(variant.as<JsonString>().isStatic() == false);
}
SECTION("set(\"true\")") {
variant.set("true");

View File

@ -63,6 +63,18 @@ TEST_CASE("JsonVariant::set() when there is enough memory") {
});
}
SECTION("char* (tiny string optimization)") {
char str[16];
strcpy(str, "abc");
bool result = variant.set(str);
strcpy(str, "def");
REQUIRE(result == true);
REQUIRE(variant == "abc"); // stores by copy
REQUIRE(spy.log() == AllocatorLog{});
}
SECTION("(char*)0") {
bool result = variant.set(static_cast<char*>(0));

View File

@ -5,8 +5,11 @@
#include <ArduinoJson.h>
#include <catch.hpp>
#include "Allocators.hpp"
TEST_CASE("deserialize MsgPack array") {
JsonDocument doc;
SpyingAllocator spy;
JsonDocument doc(&spy);
SECTION("fixarray") {
SECTION("empty") {
@ -30,6 +33,24 @@ TEST_CASE("deserialize MsgPack array") {
REQUIRE(array[0] == 1);
REQUIRE(array[1] == 2);
}
SECTION("tiny strings") {
DeserializationError error =
deserializeMsgPack(doc, "\x92\xA3xxx\xA3yyy");
REQUIRE(error == DeserializationError::Ok);
REQUIRE(doc.is<JsonArray>());
REQUIRE(doc.size() == 2);
REQUIRE(doc[0] == "xxx");
REQUIRE(doc[1] == "yyy");
REQUIRE(spy.log() == AllocatorLog{
Allocate(sizeofPool()),
Allocate(sizeofString("xxx")),
// Buffer is reused for the next string
Deallocate(sizeofString("xxx")),
Reallocate(sizeofPool(), sizeofPool(2)),
});
}
}
SECTION("array 16") {

View File

@ -348,13 +348,14 @@ TEST_CASE("deserializeMsgPack() under memory constaints") {
SECTION("{}") {
checkError(0, "\x80", DeserializationError::Ok);
}
SECTION("{H:1}") {
checkError(1, "\x81\xA1H\x01", DeserializationError::NoMemory);
checkError(2, "\x81\xA1H\x01", DeserializationError::Ok);
SECTION("{Hello:1}") {
checkError(1, "\x81\xA5Hello\x01", DeserializationError::NoMemory);
checkError(2, "\x81\xA5Hello\x01", DeserializationError::Ok);
}
SECTION("{H:1,W:2}") {
checkError(2, "\x82\xA1H\x01\xA1W\x02", DeserializationError::NoMemory);
checkError(3, "\x82\xA1H\x01\xA1W\x02", DeserializationError::Ok);
SECTION("{Hello:1,World:2}") {
checkError(2, "\x82\xA5Hello\x01\xA5World\x02",
DeserializationError::NoMemory);
checkError(3, "\x82\xA5Hello\x01\xA5World\x02", DeserializationError::Ok);
}
}
@ -362,14 +363,16 @@ TEST_CASE("deserializeMsgPack() under memory constaints") {
SECTION("{}") {
checkError(0, "\xDE\x00\x00", DeserializationError::Ok);
}
SECTION("{H:1}") {
checkError(1, "\xDE\x00\x01\xA1H\x01", DeserializationError::NoMemory);
checkError(2, "\xDE\x00\x01\xA1H\x01", DeserializationError::Ok);
}
SECTION("{H:1,W:2}") {
checkError(2, "\xDE\x00\x02\xA1H\x01\xA1W\x02",
SECTION("{Hello:1}") {
checkError(1, "\xDE\x00\x01\xA5Hello\x01",
DeserializationError::NoMemory);
checkError(3, "\xDE\x00\x02\xA1H\x01\xA1W\x02", DeserializationError::Ok);
checkError(2, "\xDE\x00\x01\xA5Hello\x01", DeserializationError::Ok);
}
SECTION("{Hello:1,World:2}") {
checkError(2, "\xDE\x00\x02\xA5Hello\x01\xA5World\x02",
DeserializationError::NoMemory);
checkError(3, "\xDE\x00\x02\xA5Hello\x01\xA5World\x02",
DeserializationError::Ok);
}
}
@ -382,8 +385,8 @@ TEST_CASE("deserializeMsgPack() under memory constaints") {
DeserializationError::NoMemory);
checkError(2, "\xDF\x00\x00\x00\x01\xA1H\x01", DeserializationError::Ok);
}
SECTION("{H:1,W:2}") {
checkError(2, "\xDF\x00\x00\x00\x02\xA1H\x01\xA1W\x02",
SECTION("{Hello:1,World:2}") {
checkError(2, "\xDF\x00\x00\x00\x02\xA5Hello\x01\xA5World\x02",
DeserializationError::NoMemory);
checkError(3, "\xDF\x00\x00\x00\x02\xA1H\x01\xA1W\x02",
DeserializationError::Ok);

View File

@ -8,6 +8,7 @@ add_executable(ResourceManagerTests
saveString.cpp
shrinkToFit.cpp
size.cpp
StringBuffer.cpp
StringBuilder.cpp
swap.cpp
)

View File

@ -0,0 +1,50 @@
// ArduinoJson - https://arduinojson.org
// Copyright © 2014-2025, Benoit BLANCHON
// MIT License
#include <ArduinoJson/Memory/StringBuffer.hpp>
#include <catch.hpp>
#include "Allocators.hpp"
#include "Literals.hpp"
using namespace ArduinoJson::detail;
TEST_CASE("StringBuffer") {
SpyingAllocator spy;
ResourceManager resources(&spy);
StringBuffer sb(&resources);
VariantData variant;
SECTION("Tiny string") {
auto ptr = sb.reserve(3);
strcpy(ptr, "hi!");
sb.save(&variant);
REQUIRE(variant.type() == VariantType::TinyString);
REQUIRE(variant.asString() == "hi!");
}
SECTION("Tiny string can't contain NUL") {
auto ptr = sb.reserve(3);
memcpy(ptr, "a\0b", 3);
sb.save(&variant);
REQUIRE(variant.type() == VariantType::OwnedString);
auto str = variant.asString();
REQUIRE(str.size() == 3);
REQUIRE(str.c_str()[0] == 'a');
REQUIRE(str.c_str()[1] == 0);
REQUIRE(str.c_str()[2] == 'b');
}
SECTION("Tiny string can't have 4 characters") {
auto ptr = sb.reserve(4);
strcpy(ptr, "alfa");
sb.save(&variant);
REQUIRE(variant.type() == VariantType::OwnedString);
REQUIRE(variant.asString() == "alfa");
}
}

View File

@ -6,8 +6,8 @@
#include <catch.hpp>
#include "Allocators.hpp"
#include "Literals.hpp"
using namespace ArduinoJson;
using namespace ArduinoJson::detail;
TEST_CASE("StringBuilder") {
@ -22,13 +22,31 @@ TEST_CASE("StringBuilder") {
str.startString();
str.save(&data);
REQUIRE(resources.size() == sizeofString(""));
REQUIRE(resources.overflowed() == false);
REQUIRE(spyingAllocator.log() ==
AllocatorLog{
Allocate(sizeofStringBuffer()),
Reallocate(sizeofStringBuffer(), sizeofString("")),
});
REQUIRE(spyingAllocator.log() == AllocatorLog{
Allocate(sizeofStringBuffer()),
});
REQUIRE(data.type() == VariantType::TinyString);
}
SECTION("Tiny string") {
StringBuilder str(&resources);
str.startString();
str.append("url");
REQUIRE(str.isValid() == true);
REQUIRE(str.str() == "url");
REQUIRE(spyingAllocator.log() == AllocatorLog{
Allocate(sizeofStringBuffer()),
});
VariantData data;
str.save(&data);
REQUIRE(resources.overflowed() == false);
REQUIRE(data.type() == VariantType::TinyString);
REQUIRE(data.asString() == "url");
}
SECTION("Short string fits in first allocation") {
@ -98,12 +116,12 @@ TEST_CASE("StringBuilder") {
}
}
static const char* saveString(StringBuilder& builder, const char* s) {
static JsonString saveString(StringBuilder& builder, const char* s) {
VariantData data;
builder.startString();
builder.append(s);
builder.save(&data);
return data.asString().c_str();
return data.asString();
}
TEST_CASE("StringBuilder::save() deduplicates strings") {
@ -116,9 +134,9 @@ TEST_CASE("StringBuilder::save() deduplicates strings") {
auto s2 = saveString(builder, "world");
auto s3 = saveString(builder, "hello");
REQUIRE(s1 == "hello"_s);
REQUIRE(s2 == "world"_s);
REQUIRE(+s1 == +s3); // same address
REQUIRE(s1 == "hello");
REQUIRE(s2 == "world");
REQUIRE(+s1.c_str() == +s3.c_str()); // same address
REQUIRE(spy.log() ==
AllocatorLog{
@ -134,9 +152,9 @@ TEST_CASE("StringBuilder::save() deduplicates strings") {
auto s1 = saveString(builder, "hello world");
auto s2 = saveString(builder, "hello");
REQUIRE(s1 == "hello world"_s);
REQUIRE(s2 == "hello"_s);
REQUIRE(+s2 != +s1); // different address
REQUIRE(s1 == "hello world");
REQUIRE(s2 == "hello");
REQUIRE(+s2.c_str() != +s1.c_str()); // different address
REQUIRE(spy.log() ==
AllocatorLog{
@ -149,18 +167,18 @@ TEST_CASE("StringBuilder::save() deduplicates strings") {
SECTION("Don't overrun") {
auto s1 = saveString(builder, "hello world");
auto s2 = saveString(builder, "wor");
auto s2 = saveString(builder, "worl");
REQUIRE(s1 == "hello world"_s);
REQUIRE(s2 == "wor"_s);
REQUIRE(s2 != s1);
REQUIRE(s1 == "hello world");
REQUIRE(s2 == "worl");
REQUIRE(s2.c_str() != s1.c_str()); // different address
REQUIRE(spy.log() ==
AllocatorLog{
Allocate(sizeofStringBuffer()),
Reallocate(sizeofStringBuffer(), sizeofString("hello world")),
Allocate(sizeofStringBuffer()),
Reallocate(sizeofStringBuffer(), sizeofString("wor")),
Reallocate(sizeofStringBuffer(), sizeofString("worl")),
});
}
}

View File

@ -34,12 +34,16 @@ class StringBuffer {
JsonString str() const {
ARDUINOJSON_ASSERT(node_ != nullptr);
return JsonString(node_->data, node_->length);
}
void save(VariantData* data) {
data->setOwnedString(commitStringNode());
ARDUINOJSON_ASSERT(node_ != nullptr);
const char* s = node_->data;
if (isTinyString(s, size_))
data->setTinyString(s, static_cast<uint8_t>(size_));
else
data->setOwnedString(commitStringNode());
}
void saveRaw(VariantData* data) {

View File

@ -28,8 +28,15 @@ class StringBuilder {
void save(VariantData* variant) {
ARDUINOJSON_ASSERT(variant != nullptr);
ARDUINOJSON_ASSERT(node_ != nullptr);
node_->data[size_] = 0;
StringNode* node = resources_->getString(adaptString(node_->data, size_));
char* p = node_->data;
if (isTinyString(p, size_)) {
variant->setTinyString(p, static_cast<uint8_t>(size_));
return;
}
p[size_] = 0;
StringNode* node = resources_->getString(adaptString(p, size_));
if (!node) {
node = resources_->resizeString(node_, size_);
ARDUINOJSON_ASSERT(node != nullptr); // realloc to smaller can't fail

View File

@ -403,7 +403,7 @@ class MsgPackDeserializer {
JsonString key = stringBuffer_.str();
TFilter memberFilter = filter[key.c_str()];
VariantData* member;
VariantData* member = 0;
if (memberFilter.allow()) {
ARDUINOJSON_ASSERT(object != 0);
@ -413,8 +413,6 @@ class MsgPackDeserializer {
return DeserializationError::NoMemory;
stringBuffer_.save(keyVariant);
} else {
member = 0;
}
err = parseVariant(member, memberFilter, nestingLimit.decrement());

View File

@ -24,6 +24,7 @@ enum class VariantTypeBits : uint8_t {
enum class VariantType : uint8_t {
Null = 0, // 0000 0000
TinyString = 0x02, // 0000 0010
RawString = 0x03, // 0000 0011
LinkedString = 0x04, // 0000 0100
OwnedString = 0x05, // 0000 0101
@ -46,6 +47,8 @@ inline bool operator&(VariantType type, VariantTypeBits bit) {
return (uint8_t(type) & uint8_t(bit)) != 0;
}
const size_t tinyStringMaxLength = 3;
union VariantContent {
VariantContent() {}
@ -61,6 +64,7 @@ union VariantContent {
CollectionData asCollection;
const char* asLinkedString;
struct StringNode* asOwnedString;
char asTinyString[tinyStringMaxLength + 1];
};
#if ARDUINOJSON_USE_EXTENSIONS

View File

@ -17,6 +17,16 @@ ARDUINOJSON_BEGIN_PRIVATE_NAMESPACE
template <typename T>
T parseNumber(const char* s);
template <typename T>
static bool isTinyString(const T& s, size_t n) {
if (n > tinyStringMaxLength)
return false;
bool containsNul = false;
for (uint8_t i = 0; i < uint8_t(n); i++)
containsNul |= !s[i];
return !containsNul;
}
class VariantData {
VariantContent content_; // must be first to allow cast from array to variant
VariantType type_;
@ -63,6 +73,9 @@ class VariantData {
case VariantType::Object:
return visit.visit(content_.asObject);
case VariantType::TinyString:
return visit.visit(JsonString(content_.asTinyString));
case VariantType::LinkedString:
return visit.visit(JsonString(content_.asLinkedString, true));
@ -199,6 +212,9 @@ class VariantData {
case VariantType::Int64:
return static_cast<T>(extension->asInt64);
#endif
case VariantType::TinyString:
str = content_.asTinyString;
break;
case VariantType::LinkedString:
str = content_.asLinkedString;
break;
@ -241,6 +257,9 @@ class VariantData {
case VariantType::Int64:
return convertNumber<T>(extension->asInt64);
#endif
case VariantType::TinyString:
str = content_.asTinyString;
break;
case VariantType::LinkedString:
str = content_.asLinkedString;
break;
@ -281,6 +300,8 @@ class VariantData {
JsonString asString() const {
switch (type_) {
case VariantType::TinyString:
return JsonString(content_.asTinyString);
case VariantType::LinkedString:
return JsonString(content_.asLinkedString, true);
case VariantType::OwnedString:
@ -395,7 +416,8 @@ class VariantData {
bool isString() const {
return type_ == VariantType::LinkedString ||
type_ == VariantType::OwnedString;
type_ == VariantType::OwnedString ||
type_ == VariantType::TinyString;
}
size_t nesting(const ResourceManager* resources) const {
@ -504,6 +526,15 @@ class VariantData {
content_.asLinkedString = s;
}
void setTinyString(const char* s, uint8_t n) {
ARDUINOJSON_ASSERT(type_ == VariantType::Null); // must call clear() first
ARDUINOJSON_ASSERT(s);
type_ = VariantType::TinyString;
for (uint8_t i = 0; i < n; i++)
content_.asTinyString[i] = s[i];
content_.asTinyString[n] = 0;
}
void setOwnedString(StringNode* s) {
ARDUINOJSON_ASSERT(type_ == VariantType::Null); // must call clear() first
ARDUINOJSON_ASSERT(s);

View File

@ -31,6 +31,11 @@ inline bool VariantData::setString(TAdaptedString value,
return true;
}
if (isTinyString(value, value.size())) {
setTinyString(value.data(), uint8_t(value.size()));
return true;
}
auto dup = resources->saveString(value);
if (dup) {
setOwnedString(dup);