Added string deduplication (closes #1303)

This commit is contained in:
Benoit Blanchon
2020-07-21 20:15:31 +02:00
parent 8ef226bcb8
commit 764ff2cd53
31 changed files with 574 additions and 156 deletions

View File

@ -215,6 +215,10 @@
#define ARDUINOJSON_TAB " "
#endif
#ifndef ARDUINOJSON_ENABLE_STRING_DEDUPLICATION
#define ARDUINOJSON_ENABLE_STRING_DEDUPLICATION 1
#endif
#ifndef ARDUINOJSON_STRING_BUFFER_SIZE
#define ARDUINOJSON_STRING_BUFFER_SIZE 32
#endif

View File

@ -212,8 +212,6 @@ class JsonDeserializer {
// Read each key value pair
for (;;) {
_stringStorage.startString(_pool);
// Parse key
err = parseKey();
if (err)
@ -233,7 +231,9 @@ class JsonDeserializer {
if (memberFilter.allow()) {
VariantData *variant = object.getMember(adaptString(key));
if (!variant) {
_stringStorage.commit(_pool);
// Save key in memory pool.
// This MUST be done before adding the slot.
key = _stringStorage.save(_pool);
// Allocate slot in object
VariantSlot *slot = object.addSlot(_pool);
@ -325,6 +325,7 @@ class JsonDeserializer {
}
DeserializationError parseKey() {
_stringStorage.startString(_pool);
if (isQuote(current())) {
return parseQuotedString();
} else {
@ -337,8 +338,8 @@ class JsonDeserializer {
DeserializationError err = parseQuotedString();
if (err)
return err;
_stringStorage.commit(_pool);
variant.setOwnedString(make_not_null(_stringStorage.c_str()));
const char *value = _stringStorage.save(_pool);
variant.setOwnedString(make_not_null(value));
return DeserializationError::Ok;
}

View File

@ -51,40 +51,43 @@ class MemoryPool {
return allocRight<VariantSlot>();
}
char* allocFrozenString(size_t n) {
if (!canAlloc(n))
return 0;
char* s = _left;
_left += n;
checkInvariants();
return s;
}
template <typename TAdaptedString>
char* saveString(const TAdaptedString& str) {
const char* saveString(const TAdaptedString& str) {
if (str.isNull())
return 0;
#if ARDUINOJSON_ENABLE_STRING_DEDUPLICATION
const char* existingCopy = findString(str.begin());
if (existingCopy)
return existingCopy;
#endif
size_t n = str.size();
char* dup = allocFrozenString(n + 1);
if (dup) {
str.copyTo(dup, n);
dup[n] = 0; // force null-terminator
char* newCopy = allocString(n + 1);
if (newCopy) {
str.copyTo(newCopy, n);
newCopy[n] = 0; // force null-terminator
}
return dup;
return newCopy;
}
StringSlot allocExpandableString() {
StringSlot s;
s.value = _left;
s.size = size_t(_right - _left);
checkInvariants();
return s;
void getFreeZone(char** zoneStart, size_t* zoneSize) const {
*zoneStart = _left;
*zoneSize = size_t(_right - _left);
}
void freezeString(StringSlot& s, size_t newSize) {
_left = (s.value + newSize);
s.size = newSize;
const char* saveStringFromFreeZone(size_t len) {
#if ARDUINOJSON_ENABLE_STRING_DEDUPLICATION
const char* dup = findString(_left);
if (dup)
return dup;
#endif
const char* str = _left;
_left += len;
checkInvariants();
return str;
}
void clear() {
@ -100,18 +103,6 @@ class MemoryPool {
return _begin <= p && p < _end;
}
template <typename T>
T* allocRight() {
return reinterpret_cast<T*>(allocRight(sizeof(T)));
}
void* allocRight(size_t bytes) {
if (!canAlloc(bytes))
return 0;
_right -= bytes;
return _right;
}
// Workaround for missing placement new
void* operator new(size_t, void* p) {
return p;
@ -163,6 +154,46 @@ class MemoryPool {
ARDUINOJSON_ASSERT(isAligned(_right));
}
#if ARDUINOJSON_ENABLE_STRING_DEDUPLICATION
template <typename TIterator>
const char* findString(TIterator str) {
for (char* next = _begin; next < _left; ++next) {
char* begin = next;
// try to match
for (TIterator it = str; *it == *next; ++it) {
if (*next++ == 0)
return begin;
}
// jump to next terminator
while (*next) ++next;
}
return 0;
}
#endif
char* allocString(size_t n) {
if (!canAlloc(n))
return 0;
char* s = _left;
_left += n;
checkInvariants();
return s;
}
template <typename T>
T* allocRight() {
return reinterpret_cast<T*>(allocRight(sizeof(T)));
}
void* allocRight(size_t bytes) {
if (!canAlloc(bytes))
return 0;
_right -= bytes;
return _right;
}
char *_begin, *_left, *_right, *_end;
};

View File

@ -248,8 +248,8 @@ class MsgPackDeserializer {
_stringStorage.append('\0');
if (!_stringStorage.isValid())
return DeserializationError::NoMemory;
_stringStorage.commit(_pool);
result = _stringStorage.c_str();
result = _stringStorage.save(_pool);
return DeserializationError::Ok;
}

View File

@ -16,16 +16,17 @@
#define ARDUINOJSON_CONCAT8(A, B, C, D, E, F, G, H) \
ARDUINOJSON_CONCAT2(ARDUINOJSON_CONCAT4(A, B, C, D), \
ARDUINOJSON_CONCAT4(E, F, G, H))
#define ARDUINOJSON_CONCAT12(A, B, C, D, E, F, G, H, I, J, K, L) \
ARDUINOJSON_CONCAT8(A, B, C, D, E, F, G, \
ARDUINOJSON_CONCAT4(H, I, J, ARDUINOJSON_CONCAT2(K, L)))
#define ARDUINOJSON_CONCAT13(A, B, C, D, E, F, G, H, I, J, K, L, M) \
ARDUINOJSON_CONCAT8(A, B, C, D, E, ARDUINOJSON_CONCAT4(F, G, H, I), \
ARDUINOJSON_CONCAT2(J, K), ARDUINOJSON_CONCAT2(L, M))
#define ARDUINOJSON_NAMESPACE \
ARDUINOJSON_CONCAT12( \
ARDUINOJSON_CONCAT13( \
ArduinoJson, ARDUINOJSON_VERSION_MAJOR, ARDUINOJSON_VERSION_MINOR, \
ARDUINOJSON_VERSION_REVISION, _, ARDUINOJSON_USE_LONG_LONG, \
ARDUINOJSON_USE_DOUBLE, ARDUINOJSON_DECODE_UNICODE, \
ARDUINOJSON_ENABLE_NAN, ARDUINOJSON_ENABLE_INFINITY, \
ARDUINOJSON_ENABLE_PROGMEM, ARDUINOJSON_ENABLE_COMMENTS)
ARDUINOJSON_ENABLE_PROGMEM, ARDUINOJSON_ENABLE_COMMENTS, \
ARDUINOJSON_ENABLE_STRING_DEDUPLICATION)
#endif

View File

@ -11,13 +11,13 @@ namespace ARDUINOJSON_NAMESPACE {
class StringCopier {
public:
void startString(MemoryPool* pool) {
_slot = pool->allocExpandableString();
pool->getFreeZone(&_ptr, &_capacity);
_size = 0;
}
void commit(MemoryPool* pool) {
ARDUINOJSON_ASSERT(_slot.value);
pool->freezeString(_slot, _size);
const char* save(MemoryPool* pool) {
ARDUINOJSON_ASSERT(_ptr);
return pool->saveStringFromFreeZone(_size);
}
void append(const char* s) {
@ -29,27 +29,28 @@ class StringCopier {
}
void append(char c) {
if (!_slot.value)
if (!_ptr)
return;
if (_size >= _slot.size) {
_slot.value = 0;
if (_size >= _capacity) {
_ptr = 0;
return;
}
_slot.value[_size++] = c;
_ptr[_size++] = c;
}
bool isValid() {
return _slot.value != 0;
return _ptr != 0;
}
const char* c_str() {
return _slot.value;
return _ptr;
}
private:
char* _ptr;
size_t _size;
StringSlot _slot;
size_t _capacity;
};
} // namespace ARDUINOJSON_NAMESPACE

View File

@ -16,7 +16,9 @@ class StringMover {
_startPtr = _writePtr;
}
void commit(MemoryPool*) const {}
const char* save(MemoryPool*) const {
return _startPtr;
}
void append(char c) {
*_writePtr++ = c;

View File

@ -39,6 +39,10 @@ class ArduinoStringAdapter {
return _str->length();
}
const char* begin() const {
return _str->c_str();
}
typedef storage_policies::store_by_copy storage_policy;
private:

View File

@ -39,6 +39,10 @@ class ConstRamStringAdapter {
return _str;
}
const char* begin() const {
return _str;
}
typedef storage_policies::store_by_address storage_policy;
protected:

View File

@ -5,6 +5,7 @@
#pragma once
#include <ArduinoJson/Polyfills/pgmspace.hpp>
#include <ArduinoJson/Strings/FlashStringIterator.hpp>
#include <ArduinoJson/Strings/IsString.hpp>
#include <ArduinoJson/Strings/StoragePolicy.hpp>
@ -42,6 +43,10 @@ class FlashStringAdapter {
return strlen_P(reinterpret_cast<const char*>(_str));
}
FlashStringIterator begin() const {
return FlashStringIterator(_str);
}
typedef storage_policies::store_by_copy storage_policy;
private:

View File

@ -0,0 +1,44 @@
// ArduinoJson - arduinojson.org
// Copyright Benoit Blanchon 2014-2020
// MIT License
#pragma once
namespace ARDUINOJSON_NAMESPACE {
class FlashStringIterator {
public:
explicit FlashStringIterator(const __FlashStringHelper* ptr)
: _ptr(reinterpret_cast<const char*>(ptr)) {}
explicit FlashStringIterator(const char* ptr) : _ptr(ptr) {}
FlashStringIterator operator+(ptrdiff_t d) const {
return FlashStringIterator(_ptr + d);
}
ptrdiff_t operator-(FlashStringIterator other) const {
return _ptr - other._ptr;
}
FlashStringIterator operator++(int) {
return FlashStringIterator(_ptr++);
}
FlashStringIterator operator++() {
return FlashStringIterator(++_ptr);
}
bool operator!=(FlashStringIterator other) const {
return _ptr != other._ptr;
}
char operator*() const {
return char(pgm_read_byte(_ptr));
}
private:
const char* _ptr;
};
} // namespace ARDUINOJSON_NAMESPACE

View File

@ -5,6 +5,7 @@
#pragma once
#include <ArduinoJson/Namespace.hpp>
#include <ArduinoJson/Strings/FlashStringIterator.hpp>
#include <ArduinoJson/Strings/IsString.hpp>
#include <ArduinoJson/Strings/StoragePolicy.hpp>
@ -41,6 +42,10 @@ class SizedFlashStringAdapter {
return _size;
}
FlashStringIterator begin() const {
return FlashStringIterator(_str);
}
typedef storage_policies::store_by_copy storage_policy;
private:

View File

@ -36,6 +36,10 @@ class SizedRamStringAdapter {
return _size;
}
const char* begin() const {
return _str;
}
typedef storage_policies::store_by_copy storage_policy;
private:

View File

@ -41,6 +41,10 @@ class StlStringAdapter {
return _str->size();
}
const char* begin() const {
return _str->c_str();
}
typedef storage_policies::store_by_copy storage_policy;
private:

View File

@ -192,7 +192,7 @@ class VariantData {
template <typename T>
bool setOwnedRaw(SerializedValue<T> value, MemoryPool *pool) {
char *dup = pool->saveString(adaptString(value.data(), value.size()));
const char *dup = pool->saveString(adaptString(value.data(), value.size()));
if (dup) {
setType(VALUE_IS_OWNED_RAW);
_content.asRaw.data = dup;