mirror of
https://github.com/boostorg/mqtt5.git
synced 2025-07-31 21:14:49 +02:00
Align utf8 manipulation code style.
This commit is contained in:
@@ -6,44 +6,32 @@
|
||||
|
||||
namespace async_mqtt5::detail {
|
||||
|
||||
struct code_point {
|
||||
int32_t val;
|
||||
uint32_t size;
|
||||
inline int pop_front_unichar(std::string_view& s) {
|
||||
// assuming that s.length() is > 0
|
||||
|
||||
auto operator<=>(const code_point&) const = default;
|
||||
int n = s[0] & 0xF0;
|
||||
int ch = -1;
|
||||
|
||||
static code_point from(std::string_view s) {
|
||||
auto hnibble = s[0] & 0xF0;
|
||||
return
|
||||
(hnibble & 0x80) == 0 ?
|
||||
code_point { s[0], 1 }
|
||||
:
|
||||
(hnibble == 0xC0 || hnibble == 0xD0) && s.size() > 1 ?
|
||||
code_point {
|
||||
(int32_t(s[0] & 0x1F) << 6) | int32_t(s[1] & 0x3F),
|
||||
2
|
||||
}
|
||||
:
|
||||
(hnibble == 0xE0) && s.size() > 2 ?
|
||||
code_point {
|
||||
(int32_t(s[0] & 0x1F) << 12) |
|
||||
(int32_t(s[1] & 0x3F) << 6) |
|
||||
int32_t(s[2] & 0x3F),
|
||||
3
|
||||
}
|
||||
:
|
||||
(hnibble == 0xF0) && s.size() > 3 ?
|
||||
code_point {
|
||||
(int32_t(s[0] & 0x1F) << 18) |
|
||||
(int32_t(s[1] & 0x3F) << 12) |
|
||||
(int32_t(s[2] & 0x3F) << 6) |
|
||||
int32_t(s[3] & 0x3F),
|
||||
4
|
||||
}
|
||||
:
|
||||
code_point { -1, 0 };
|
||||
if ((n & 0x80) == 0) {
|
||||
ch = s[0];
|
||||
s.remove_prefix(1);
|
||||
}
|
||||
};
|
||||
else if ((n == 0xC0 || n == 0xD0) && s.size() > 1) {
|
||||
ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
|
||||
s.remove_prefix(2);
|
||||
}
|
||||
else if ((n == 0xE0) && s.size() > 2) {
|
||||
ch = ((s[0] & 0x1F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
|
||||
s.remove_prefix(3);
|
||||
}
|
||||
else if ((n == 0xF0) && s.size() > 3) {
|
||||
ch = ((s[0] & 0x1F) << 18) | ((s[1] & 0x3F) << 12) |
|
||||
((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
|
||||
s.remove_prefix(4);
|
||||
}
|
||||
|
||||
return ch;
|
||||
}
|
||||
|
||||
inline bool is_valid_mqtt_utf8(std::string_view str) {
|
||||
constexpr size_t max_sz = 65535;
|
||||
@@ -51,23 +39,21 @@ inline bool is_valid_mqtt_utf8(std::string_view str) {
|
||||
if (str.size() > max_sz)
|
||||
return false;
|
||||
|
||||
auto is_valid_cp = [](int32_t c) -> bool {
|
||||
constexpr int32_t fe_flag = 0xFE;
|
||||
constexpr int32_t ff_flag = 0xFF;
|
||||
|
||||
return c >= 32 && // U+0000...U+001F control characters
|
||||
(c < 127 || c > 159) && // U+007F...0+009F control characters
|
||||
(c < 55296 || c > 57343) && // U+D800...U+DFFF surrogates
|
||||
(c < 64976 || c > 65007) &&// U+FDD0...U+FDEF non-characters
|
||||
(c & fe_flag) != fe_flag && // non-characters
|
||||
(c & ff_flag) != ff_flag;
|
||||
};
|
||||
constexpr int fe_flag = 0xFE;
|
||||
constexpr int ff_flag = 0xFF;
|
||||
|
||||
while (!str.empty()) {
|
||||
auto cp = code_point::from(str.data());
|
||||
if (!is_valid_cp(cp.val))
|
||||
int c = pop_front_unichar(str);
|
||||
|
||||
auto is_valid = c > 0x001F && // U+0000...U+001F control characters
|
||||
(c < 0x007F || c > 0x009F) && // U+007F...0+009F control characters
|
||||
(c < 0xD800 || c > 0xDFFF) && // U+D800...U+DFFF surrogates
|
||||
(c < 0xFDD0 || c > 0xFDEF) && // U+FDD0...U+FDEF non-characters
|
||||
(c & fe_flag) != fe_flag && // non-characters
|
||||
(c & ff_flag) != ff_flag;
|
||||
|
||||
if (!is_valid)
|
||||
return false;
|
||||
str.remove_prefix(cp.size);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -4,45 +4,46 @@
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(utf8_mqtt/*, *boost::unit_test::disabled()*/)
|
||||
|
||||
std::string to_str(async_mqtt5::detail::code_point cp) {
|
||||
return cp.size == 1 ? std::string { char(cp.val) }
|
||||
: cp.size == 2 ? std::string { char((cp.val >> 6) | 0xC0), char((cp.val & 0x3F) | 0x80) }
|
||||
: cp.size == 3 ? std::string {
|
||||
char((cp.val >> 12) | 0xE0),
|
||||
char(((cp.val >> 6) & 0x3F) | 0x80),
|
||||
char((cp.val & 0x3F) | 0x80)
|
||||
}
|
||||
: std::string { // cp.size == 4
|
||||
char((cp.val >> 18) | 0xF0),
|
||||
char(((cp.val >> 12) & 0x3F) | 0x80),
|
||||
char(((cp.val >> 6) & 0x3F) | 0x80),
|
||||
char((cp.val & 0x3F) | 0x80)
|
||||
|
||||
std::string to_str(int utf8ch) {
|
||||
if (utf8ch < 0x80)
|
||||
return { char(utf8ch) };
|
||||
if (utf8ch < 0x800)
|
||||
return {
|
||||
char((utf8ch >> 6) | 0xC0),
|
||||
char((utf8ch & 0x3F) | 0x80)
|
||||
};
|
||||
}
|
||||
|
||||
async_mqtt5::detail::code_point cp(int32_t val) {
|
||||
return { val, uint32_t(val < 0x80 ? 1 : val < 0x800 ? 2 : val < 0xFFFF ? 3 : /* val < 0x10FFFF */ 4) };
|
||||
if (utf8ch < 0xFFFF)
|
||||
return {
|
||||
char((utf8ch >> 12) | 0xE0),
|
||||
char(((utf8ch >> 6) & 0x3F) | 0x80),
|
||||
char((utf8ch & 0x3F) | 0x80)
|
||||
};
|
||||
return {
|
||||
char((utf8ch >> 18) | 0xF0),
|
||||
char(((utf8ch >> 12) & 0x3F) | 0x80),
|
||||
char(((utf8ch >> 6) & 0x3F) | 0x80),
|
||||
char((utf8ch & 0x3F) | 0x80)
|
||||
};
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(utf8_string_validation) {
|
||||
using namespace async_mqtt5::detail;
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8("stringy"), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(""), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(1))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(31))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(32))), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(126))), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(127))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(159))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(160))), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(55296))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(57343))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(64976))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(65007))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(65008))), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(131070))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(cp(131071))), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(1)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(31)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(32)), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(126)), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(127)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(159)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(160)), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(55296)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(57343)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(64976)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(65007)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(65008)), true);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(131070)), false);
|
||||
BOOST_CHECK_EQUAL(is_valid_mqtt_utf8(to_str(131071)), false);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(utf8_topic_validation) {
|
||||
|
Reference in New Issue
Block a user