C++: Add utf16 indices to Macro and Document::MacroUse

In most cases we need to work with the utf16 indices. Only in cppfindreferences the byte interface is still needed since there we read in files and work on a QByteArray to save memory. Change-Id: I6ef6a93fc1875a8c9a305c075d51a9ca034c41bb Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
2014-05-09 10:04:13 -04:00
parent bb7da966b8
commit c6358e5d38
24 changed files with 345 additions and 215 deletions
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -36,6 +36,21 @@ using namespace CPlusPlus;
    \sa Token
 */

+/*!
+    \fn static void Lexer::yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar, unsigned &utf16charCounter)
+
+    Process a single unicode code point in an UTF-8 encoded source.
+
+    \a currentSourceChar points to the UTF-8 encoded source.
+    \a yychar must be the byte pointed to by \a currentSourceChar.
+
+    Points \a currentSourceChar to the byte of the next code point
+    and modifies \a yychar to the value pointed by the updated
+    \a currentSourceChar. \a utf16charCounter will be incremented by
+    the number of UTF-16 code units that were needed for that code
+    point.
+*/
+
 Lexer::Lexer(TranslationUnit *unit)
    : _translationUnit(unit),
      _control(unit->control()),
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -61,6 +61,28 @@ public:
    LanguageFeatures languageFeatures() const { return _languageFeatures; }
    void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }

+public:
+    static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
+                           unsigned &utf16charCounter)
+    {
+        ++utf16charCounter;
+
+        // Process multi-byte UTF-8 code point (non-latin1)
+        if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(yychar))) {
+            unsigned trailingBytesCurrentCodePoint = 1;
+            for (unsigned char c = yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
+                ++trailingBytesCurrentCodePoint;
+            // Code points >= 0x00010000 are represented by two UTF-16 code units
+            if (trailingBytesCurrentCodePoint >= 3)
+                ++utf16charCounter;
+            yychar = *(currentSourceChar += trailingBytesCurrentCodePoint + 1);
+
+            // Process single-byte UTF-8 code point (latin1)
+        } else {
+            yychar = *++currentSourceChar;
+        }
+    }
+
 private:
    void pushLineStartOffset();
    void scan_helper(Token *tok);
@@ -83,23 +105,7 @@ private:

    void yyinp()
    {
-        ++_currentCharUtf16;
-
-        // Process multi-byte UTF-8 code point (non-latin1)
-        if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(_yychar))) {
-            unsigned trailingBytesCurrentCodePoint = 1;
-            for (unsigned char c = _yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
-                ++trailingBytesCurrentCodePoint;
-            // Code points >= 0x00010000 are represented by two UTF16 code units
-            if (trailingBytesCurrentCodePoint >= 3)
-                ++_currentCharUtf16;
-            _yychar = *(_currentChar += trailingBytesCurrentCodePoint + 1);
-
-        // Process single-byte UTF-8 code point (latin1)
-        } else {
-            _yychar = *++_currentChar;
-        }
-
+        yyinp_utf8(_currentChar, _yychar, _currentCharUtf16);
        if (CPLUSPLUS_UNLIKELY(_yychar == '\n'))
            pushLineStartOffset();
    }
--- a/src/libs/3rdparty/cplusplus/TranslationUnit.cpp
+++ b/src/libs/3rdparty/cplusplus/TranslationUnit.cpp
@@ -264,7 +264,7 @@ void TranslationUnit::tokenize()
            currentExpanded = true;
            const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx];
            if (p.first)
-                _expandedLineColumn.insert(std::make_pair(tk.bytesBegin(), p));
+                _expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p));
            else
                currentGenerated = true;