Clang: Extract Utils::utf8AdvanceCodePoint

Change-Id: I922c7b0f2f0e0d50f34035e9affef4504df59892
Reviewed-by: David Schulz <david.schulz@qt.io>
This commit is contained in:
Nikolai Kosjar
2019-07-24 09:34:50 +02:00
parent 75a065d3d1
commit c4889e9904
3 changed files with 29 additions and 19 deletions

View File

@@ -190,5 +190,30 @@ QString utf16LineTextInUtf8Buffer(const QByteArray &utf8Buffer, int currentUtf8O
utf8Buffer.mid(lineStartUtf8Offset, lineEndUtf8Offset - lineStartUtf8Offset));
}
static bool isByteOfMultiByteCodePoint(unsigned char byte)
{
return byte & 0x80; // Check if most significant bit is set
}
bool utf8AdvanceCodePoint(const char *&current)
{
if (Q_UNLIKELY(*current == '\0'))
return false;
// Process multi-byte UTF-8 code point (non-latin1)
if (Q_UNLIKELY(isByteOfMultiByteCodePoint(*current))) {
unsigned trailingBytesCurrentCodePoint = 1;
for (unsigned char c = (*current) << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
++trailingBytesCurrentCodePoint;
current += trailingBytesCurrentCodePoint + 1;
// Process single-byte UTF-8 code point (latin1)
} else {
++current;
}
return true;
}
} // Text
} // Utils

View File

@@ -55,6 +55,7 @@ QTCREATOR_UTILS_EXPORT QTextCursor flippedCursor(const QTextCursor &cursor);
QTCREATOR_UTILS_EXPORT QTextCursor wordStartCursor(const QTextCursor &cursor);
QTCREATOR_UTILS_EXPORT QString wordUnderCursor(const QTextCursor &cursor);
QTCREATOR_UTILS_EXPORT bool utf8AdvanceCodePoint(const char *&current);
QTCREATOR_UTILS_EXPORT int utf8NthLineOffset(const QTextDocument *textDocument,
const QByteArray &buffer,
int line);

View File

@@ -25,6 +25,8 @@
#include "utf8positionfromlinecolumn.h"
#include <utils/textutils.h>
#include <QtGlobal>
namespace ClangBackEnd {
@@ -77,31 +79,13 @@ bool Utf8PositionFromLineColumn::advanceToColumn(int column)
return column == 0;
}
static bool isByteOfMultiByteCodePoint(unsigned char byte)
{
return byte & 0x80; // Check if most significant bit is set
}
bool Utf8PositionFromLineColumn::advanceCodePoint(bool stopOnNewLine)
{
if (Q_UNLIKELY(*m_currentByte == '\0') || (stopOnNewLine && *m_currentByte == '\n'))
return false;
m_previousByte = m_currentByte;
// Process multi-byte UTF-8 code point (non-latin1)
if (Q_UNLIKELY(isByteOfMultiByteCodePoint(*m_currentByte))) {
unsigned trailingBytesCurrentCodePoint = 1;
for (unsigned char c = (*m_currentByte) << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
++trailingBytesCurrentCodePoint;
m_currentByte += trailingBytesCurrentCodePoint + 1;
// Process single-byte UTF-8 code point (latin1)
} else {
++m_currentByte;
}
return true;
return Utils::Text::utf8AdvanceCodePoint(m_currentByte);
}
} // namespace ClangBackEnd