From e600424648eaebe5cf3ea1fa3ae8a41a22ce7b95 Mon Sep 17 00:00:00 2001
From: Orgad Shaneh <orgad.shaneh@audiocodes.com>
Date: Thu, 23 Jan 2014 22:16:43 +0200
Subject: [PATCH] C++: Fix support for incremental input with \n

Also fix false positive line continuation on blank line

e.g.
"foo \

bar"

Change-Id: Ic6d345a4b578c955411d119b8438c8dc5065c072
Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
---
 src/libs/3rdparty/cplusplus/Lexer.cpp         | 55 +++++++++++--------
 src/libs/3rdparty/cplusplus/Lexer.h           | 10 +++-
 src/plugins/cppeditor/cpphighlighter.cpp      |  5 +-
 src/plugins/cpptools/cppcodeformatter.cpp     |  5 +-
 .../texteditor/basetextdocumentlayout.h       |  2 +-
 tests/auto/cplusplus/lexer/tst_lexer.cpp      | 30 +++++++++-
 6 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp
index 9c2f925369c..a796499fe05 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -32,7 +32,7 @@ using namespace CPlusPlus;
 Lexer::Lexer(TranslationUnit *unit)
     : _translationUnit(unit),
       _control(unit->control()),
-      _state(T_EOF_SYMBOL),
+      _state(0),
       _flags(0),
       _currentLine(1)
 {
@@ -44,7 +44,7 @@ Lexer::Lexer(TranslationUnit *unit)
 Lexer::Lexer(const char *firstChar, const char *lastChar)
     : _translationUnit(0),
       _control(0),
-      _state(T_EOF_SYMBOL),
+      _state(0),
       _flags(0),
       _currentLine(1)
 {
@@ -131,8 +131,21 @@ void Lexer::scan_helper(Token *tok)
   _Lagain:
     while (_yychar && std::isspace(_yychar)) {
         if (_yychar == '\n') {
-            tok->f.joined = false;
-            tok->f.newline = true;
+            tok->f.joined = s._newlineExpected;
+            tok->f.newline = !s._newlineExpected;
+
+            if (s._newlineExpected) {
+                s._newlineExpected = false;
+            } else {
+                switch (s._tokenKind) {
+                case T_EOF_SYMBOL:
+                case T_COMMENT:
+                case T_DOXY_COMMENT:
+                    break; // multiline tokens, don't break on newline
+                default: // Strings and C++ comments
+                    _state = 0;
+                }
+            }
         } else {
             tok->f.whitespace = true;
         }
@@ -145,12 +158,14 @@ void Lexer::scan_helper(Token *tok)
     _tokenStart = _currentChar;
     tok->offset = _currentChar - _firstChar;
 
-    if (_state != T_EOF_SYMBOL && !_yychar) {
+    if (_yychar) {
+        s._newlineExpected = false;
+    } else if (s._tokenKind) {
         tok->f.kind = T_EOF_SYMBOL;
         return;
     }
 
-    switch (_state) {
+    switch (s._tokenKind) {
     case T_EOF_SYMBOL:
         break;
     case T_COMMENT:
@@ -164,7 +179,7 @@ void Lexer::scan_helper(Token *tok)
                 yyinp();
                 if (_yychar == '/') {
                     yyinp();
-                    _state = T_EOF_SYMBOL;
+                    _state = 0;
                     break;
                 }
             }
@@ -178,13 +193,15 @@ void Lexer::scan_helper(Token *tok)
     }
     case T_CPP_COMMENT:
     case T_CPP_DOXY_COMMENT:
-        tok->f.kind = _state;
-        _state = T_EOF_SYMBOL;
+        tok->f.joined = true;
+        tok->f.kind = s._tokenKind;
+        _state = 0;
         scanCppComment((Kind)tok->f.kind);
         return;
     default: // Strings
-        tok->f.kind = _state;
-        _state = T_EOF_SYMBOL;
+        tok->f.joined = true;
+        tok->f.kind = s._tokenKind;
+        _state = 0;
         scanUntilQuote(tok, '"');
         return;
     }
@@ -199,14 +216,7 @@ void Lexer::scan_helper(Token *tok)
 
     switch (ch) {
     case '\\':
-        while (_yychar != '\n' && std::isspace(_yychar))
-            yyinp();
-        // ### CPP_CHECK(! _yychar || _yychar == '\n');
-        if (_yychar == '\n') {
-            tok->f.joined = true;
-            tok->f.newline = false;
-            yyinp();
-        }
+        s._newlineExpected = true;
         goto _Lagain;
 
     case '"':
@@ -417,7 +427,7 @@ void Lexer::scan_helper(Token *tok)
             if (_yychar)
                 yyinp();
             else
-                _state = commentKind;
+                s._tokenKind = commentKind;
 
             if (! f._scanCommentTokens)
                 goto _Lagain;
@@ -804,7 +814,8 @@ void Lexer::scanBackslash(Kind type)
     while (_yychar != '\n' && std::isspace(_yychar))
         yyinp();
     if (!_yychar) {
-        _state = type;
+        s._tokenKind = type;
+        s._newlineExpected = true;
         return;
     }
     if (_yychar == '\n') {
@@ -812,7 +823,7 @@ void Lexer::scanBackslash(Kind type)
         while (_yychar != '\n' && std::isspace(_yychar))
             yyinp();
         if (!_yychar)
-            _state = type;
+            s._tokenKind = type;
     }
 }
 
diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h
index 30f9ff8e890..12cef91b4de 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -99,6 +99,11 @@ private:
         unsigned _scanAngleStringLiteralTokens: 1;
     };
 
+    struct State {
+        unsigned char _tokenKind : 7;
+        unsigned char _newlineExpected : 1;
+    };
+
     TranslationUnit *_translationUnit;
     Control *_control;
     const char *_firstChar;
@@ -106,7 +111,10 @@ private:
     const char *_lastChar;
     const char *_tokenStart;
     unsigned char _yychar;
-    int _state;
+    union {
+        unsigned char _state;
+        State s;
+    };
     union {
         unsigned _flags;
         Flags f;
diff --git a/src/plugins/cppeditor/cpphighlighter.cpp b/src/plugins/cppeditor/cpphighlighter.cpp
index dec2b2f082c..d039b9e1477 100644
--- a/src/plugins/cppeditor/cpphighlighter.cpp
+++ b/src/plugins/cppeditor/cpphighlighter.cpp
@@ -66,7 +66,7 @@ CppHighlighter::CppHighlighter(QTextDocument *document) :
 void CppHighlighter::highlightBlock(const QString &text)
 {
     const int previousState = previousBlockState();
-    int state = T_EOF_SYMBOL, initialBraceDepth = 0;
+    int state = 0, initialBraceDepth = 0;
     if (previousState != -1) {
         state = previousState & 0xff;
         initialBraceDepth = previousState >> 8;
@@ -85,6 +85,7 @@ void CppHighlighter::highlightBlock(const QString &text)
     const QList<Token> tokens = tokenize(text, initialState);
     state = tokenize.state(); // refresh the state
 
+    initialState &= ~0x80; // discard newline expected bit
     int foldingIndent = initialBraceDepth;
     if (TextBlockUserData *userData = BaseTextDocumentLayout::testUserData(currentBlock())) {
         userData->setFoldingIndent(0);
@@ -93,7 +94,7 @@ void CppHighlighter::highlightBlock(const QString &text)
     }
 
     if (tokens.isEmpty()) {
-        setCurrentBlockState(previousState);
+        setCurrentBlockState(state);
         BaseTextDocumentLayout::clearParentheses(currentBlock());
         if (text.length())  {// the empty line can still contain whitespace
             if (initialState == T_COMMENT)
diff --git a/src/plugins/cpptools/cppcodeformatter.cpp b/src/plugins/cpptools/cppcodeformatter.cpp
index db557959be5..08d51889459 100644
--- a/src/plugins/cpptools/cppcodeformatter.cpp
+++ b/src/plugins/cpptools/cppcodeformatter.cpp
@@ -70,7 +70,8 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
     restoreCurrentState(block.previous());
 
     bool endedJoined = false;
-    const int lexerState = tokenizeBlock(block, &endedJoined);
+    // Discard newline expected bit from state
+    const int lexerState = tokenizeBlock(block, &endedJoined) & ~0x80;
     m_tokenIndex = 0;
     m_newStates.clear();
 
@@ -504,7 +505,7 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
                 leave();
                 continue;
             } else if (m_tokenIndex == m_tokens.size() - 1
-                    && lexerState == T_EOF_SYMBOL) {
+                    && lexerState == 0) {
                 leave();
             } else if (m_tokenIndex == 0 && m_currentToken.isComment()) {
                 // to allow enter/leave to update the indentDepth
diff --git a/src/plugins/texteditor/basetextdocumentlayout.h b/src/plugins/texteditor/basetextdocumentlayout.h
index 2eab6e919a9..ffeda3b0457 100644
--- a/src/plugins/texteditor/basetextdocumentlayout.h
+++ b/src/plugins/texteditor/basetextdocumentlayout.h
@@ -139,7 +139,7 @@ private:
     uint m_folded : 1;
     uint m_ifdefedOut : 1;
     uint m_foldingIndent : 16;
-    uint m_lexerState : 4;
+    uint m_lexerState : 8;
     uint m_foldingStartIncluded : 1;
     uint m_foldingEndIncluded : 1;
     Parentheses m_parentheses;
diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp
index a0de560b895..359e5a88e6c 100644
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -254,6 +254,18 @@ void tst_SimpleLexer::incremental_data()
             << _("\"foo \\\n\nbar\"")
             << (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL);
 
+    QTest::newRow("escaped_string_literal_with_newline_1")
+            << _("\"foo \\")
+            << (List() << T_STRING_LITERAL);
+
+    QTest::newRow("escaped_string_literal_with_newline_2")
+            << _("")
+            << List();
+
+    QTest::newRow("escaped_string_literal_with_newline_3")
+            << _("bar")
+            << (List() << T_IDENTIFIER);
+
     QTest::newRow("escaped_string_literal_with_space_and_newline_single")
             << _("\"foo \\   \n   bar\"")
             << (List() << T_STRING_LITERAL);
@@ -263,8 +275,8 @@ void tst_SimpleLexer::incremental_data()
             << (List() << T_STRING_LITERAL);
 
     QTest::newRow("escaped_string_literal_with_space_and_newline_2")
-            << _("bar\"")
-            << (List() << T_STRING_LITERAL);
+            << _("bar")
+            << (List() << T_IDENTIFIER);
 
     QTest::newRow("token_after_escaped_string_literal_1")
             << _("\"foo \\")
@@ -310,6 +322,18 @@ void tst_SimpleLexer::incremental_data()
             << _("//foo \\\n\nbar")
             << (List() << T_CPP_COMMENT << T_IDENTIFIER);
 
+    QTest::newRow("escaped_cpp_comment_with_newline_1")
+            << _("//foo \\")
+            << (List() << T_CPP_COMMENT);
+
+    QTest::newRow("escaped_cpp_comment_with_newline_2")
+            << _("")
+            << List();
+
+    QTest::newRow("escaped_cpp_comment_with_newline_3")
+            << _("bar")
+            << (List() << T_IDENTIFIER);
+
     QTest::newRow("escaped_cpp_comment_with_space_and_newline_single")
             << _("//foo \\   \n   bar")
             << (List() << T_CPP_COMMENT);
@@ -320,7 +344,7 @@ void tst_SimpleLexer::incremental_data()
 
     QTest::newRow("escaped_cpp_comment_with_space_and_newline_2")
             << _("bar")
-            << (List() << T_CPP_COMMENT);
+            << (List() << T_IDENTIFIER);
 }
 
 QTEST_APPLESS_MAIN(tst_SimpleLexer)