forked from qt-creator/qt-creator
C++: Add utf16 indices to Macro and Document::MacroUse
In most cases we need to work with the utf16 indices. Only in cppfindreferences the byte interface is still needed since there we read in files and work on a QByteArray to save memory. Change-Id: I6ef6a93fc1875a8c9a305c075d51a9ca034c41bb Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
This commit is contained in:
15
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
15
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
@@ -36,6 +36,21 @@ using namespace CPlusPlus;
|
||||
\sa Token
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn static void Lexer::yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar, unsigned &utf16charCounter)
|
||||
|
||||
Process a single unicode code point in an UTF-8 encoded source.
|
||||
|
||||
\a currentSourceChar points to the UTF-8 encoded source.
|
||||
\a yychar must be the byte pointed to by \a currentSourceChar.
|
||||
|
||||
Points \a currentSourceChar to the byte of the next code point
|
||||
and modifies \a yychar to the value pointed by the updated
|
||||
\a currentSourceChar. \a utf16charCounter will be incremented by
|
||||
the number of UTF-16 code units that were needed for that code
|
||||
point.
|
||||
*/
|
||||
|
||||
Lexer::Lexer(TranslationUnit *unit)
|
||||
: _translationUnit(unit),
|
||||
_control(unit->control()),
|
||||
|
||||
40
src/libs/3rdparty/cplusplus/Lexer.h
vendored
40
src/libs/3rdparty/cplusplus/Lexer.h
vendored
@@ -61,6 +61,28 @@ public:
|
||||
LanguageFeatures languageFeatures() const { return _languageFeatures; }
|
||||
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
|
||||
|
||||
public:
|
||||
static void yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar,
|
||||
unsigned &utf16charCounter)
|
||||
{
|
||||
++utf16charCounter;
|
||||
|
||||
// Process multi-byte UTF-8 code point (non-latin1)
|
||||
if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(yychar))) {
|
||||
unsigned trailingBytesCurrentCodePoint = 1;
|
||||
for (unsigned char c = yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
|
||||
++trailingBytesCurrentCodePoint;
|
||||
// Code points >= 0x00010000 are represented by two UTF-16 code units
|
||||
if (trailingBytesCurrentCodePoint >= 3)
|
||||
++utf16charCounter;
|
||||
yychar = *(currentSourceChar += trailingBytesCurrentCodePoint + 1);
|
||||
|
||||
// Process single-byte UTF-8 code point (latin1)
|
||||
} else {
|
||||
yychar = *++currentSourceChar;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void pushLineStartOffset();
|
||||
void scan_helper(Token *tok);
|
||||
@@ -83,23 +105,7 @@ private:
|
||||
|
||||
void yyinp()
|
||||
{
|
||||
++_currentCharUtf16;
|
||||
|
||||
// Process multi-byte UTF-8 code point (non-latin1)
|
||||
if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(_yychar))) {
|
||||
unsigned trailingBytesCurrentCodePoint = 1;
|
||||
for (unsigned char c = _yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
|
||||
++trailingBytesCurrentCodePoint;
|
||||
// Code points >= 0x00010000 are represented by two UTF16 code units
|
||||
if (trailingBytesCurrentCodePoint >= 3)
|
||||
++_currentCharUtf16;
|
||||
_yychar = *(_currentChar += trailingBytesCurrentCodePoint + 1);
|
||||
|
||||
// Process single-byte UTF-8 code point (latin1)
|
||||
} else {
|
||||
_yychar = *++_currentChar;
|
||||
}
|
||||
|
||||
yyinp_utf8(_currentChar, _yychar, _currentCharUtf16);
|
||||
if (CPLUSPLUS_UNLIKELY(_yychar == '\n'))
|
||||
pushLineStartOffset();
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ void TranslationUnit::tokenize()
|
||||
currentExpanded = true;
|
||||
const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx];
|
||||
if (p.first)
|
||||
_expandedLineColumn.insert(std::make_pair(tk.bytesBegin(), p));
|
||||
_expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p));
|
||||
else
|
||||
currentGenerated = true;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user