C++: Fix support for incremental input with \n

Also fix false positive line continuation on blank line e.g. "foo \ bar" Change-Id: Ic6d345a4b578c955411d119b8438c8dc5065c072 Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
2014-01-23 22:16:43 +02:00
parent fd31b4716c
commit e600424648
6 changed files with 76 additions and 31 deletions
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -32,7 +32,7 @@ using namespace CPlusPlus;
 Lexer::Lexer(TranslationUnit *unit)
    : _translationUnit(unit),
      _control(unit->control()),
-      _state(T_EOF_SYMBOL),
+      _state(0),
      _flags(0),
      _currentLine(1)
 {
@@ -44,7 +44,7 @@ Lexer::Lexer(TranslationUnit *unit)
 Lexer::Lexer(const char *firstChar, const char *lastChar)
    : _translationUnit(0),
      _control(0),
-      _state(T_EOF_SYMBOL),
+      _state(0),
      _flags(0),
      _currentLine(1)
 {
@@ -131,8 +131,21 @@ void Lexer::scan_helper(Token *tok)
  _Lagain:
    while (_yychar && std::isspace(_yychar)) {
        if (_yychar == '\n') {
-            tok->f.joined = false;
+            tok->f.joined = s._newlineExpected;
-            tok->f.newline = true;
+            tok->f.newline = !s._newlineExpected;
            if (s._newlineExpected) {
                s._newlineExpected = false;
            } else {
                switch (s._tokenKind) {
                case T_EOF_SYMBOL:
                case T_COMMENT:
                case T_DOXY_COMMENT:
                    break; // multiline tokens, don't break on newline
                default: // Strings and C++ comments
                    _state = 0;
                }
            }
        } else {
            tok->f.whitespace = true;
        }
@@ -145,12 +158,14 @@ void Lexer::scan_helper(Token *tok)
    _tokenStart = _currentChar;
    tok->offset = _currentChar - _firstChar;
-    if (_state != T_EOF_SYMBOL && !_yychar) {
+    if (_yychar) {
        s._newlineExpected = false;
    } else if (s._tokenKind) {
        tok->f.kind = T_EOF_SYMBOL;
        return;
    }
-    switch (_state) {
+    switch (s._tokenKind) {
    case T_EOF_SYMBOL:
        break;
    case T_COMMENT:
@@ -164,7 +179,7 @@ void Lexer::scan_helper(Token *tok)
                yyinp();
                if (_yychar == '/') {
                    yyinp();
-                    _state = T_EOF_SYMBOL;
+                    _state = 0;
                    break;
                }
            }
@@ -178,13 +193,15 @@ void Lexer::scan_helper(Token *tok)
    }
    case T_CPP_COMMENT:
    case T_CPP_DOXY_COMMENT:
-        tok->f.kind = _state;
+        tok->f.joined = true;
-        _state = T_EOF_SYMBOL;
+        tok->f.kind = s._tokenKind;
        _state = 0;
        scanCppComment((Kind)tok->f.kind);
        return;
    default: // Strings
-        tok->f.kind = _state;
+        tok->f.joined = true;
-        _state = T_EOF_SYMBOL;
+        tok->f.kind = s._tokenKind;
        _state = 0;
        scanUntilQuote(tok, '"');
        return;
    }
@@ -199,14 +216,7 @@ void Lexer::scan_helper(Token *tok)
    switch (ch) {
    case '\\':
-        while (_yychar != '\n' && std::isspace(_yychar))
+        s._newlineExpected = true;
            yyinp();
        // ### CPP_CHECK(! _yychar || _yychar == '\n');
        if (_yychar == '\n') {
            tok->f.joined = true;
            tok->f.newline = false;
            yyinp();
        }
        goto _Lagain;
    case '"':
@@ -417,7 +427,7 @@ void Lexer::scan_helper(Token *tok)
            if (_yychar)
                yyinp();
            else
-                _state = commentKind;
+                s._tokenKind = commentKind;
            if (! f._scanCommentTokens)
                goto _Lagain;
@@ -804,7 +814,8 @@ void Lexer::scanBackslash(Kind type)
    while (_yychar != '\n' && std::isspace(_yychar))
        yyinp();
    if (!_yychar) {
-        _state = type;
+        s._tokenKind = type;
        s._newlineExpected = true;
        return;
    }
    if (_yychar == '\n') {
@@ -812,7 +823,7 @@ void Lexer::scanBackslash(Kind type)
        while (_yychar != '\n' && std::isspace(_yychar))
            yyinp();
        if (!_yychar)
-            _state = type;
+            s._tokenKind = type;
    }
 }
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -99,6 +99,11 @@ private:
        unsigned _scanAngleStringLiteralTokens: 1;
    };
    struct State {
        unsigned char _tokenKind : 7;
        unsigned char _newlineExpected : 1;
    };
    TranslationUnit *_translationUnit;
    Control *_control;
    const char *_firstChar;
@@ -106,7 +111,10 @@ private:
    const char *_lastChar;
    const char *_tokenStart;
    unsigned char _yychar;
-    int _state;
+    union {
        unsigned char _state;
        State s;
    };
    union {
        unsigned _flags;
        Flags f;
--- a/src/plugins/cppeditor/cpphighlighter.cpp
+++ b/src/plugins/cppeditor/cpphighlighter.cpp
@@ -66,7 +66,7 @@ CppHighlighter::CppHighlighter(QTextDocument *document) :
 void CppHighlighter::highlightBlock(const QString &text)
 {
    const int previousState = previousBlockState();
-    int state = T_EOF_SYMBOL, initialBraceDepth = 0;
+    int state = 0, initialBraceDepth = 0;
    if (previousState != -1) {
        state = previousState & 0xff;
        initialBraceDepth = previousState >> 8;
@@ -85,6 +85,7 @@ void CppHighlighter::highlightBlock(const QString &text)
    const QList<Token> tokens = tokenize(text, initialState);
    state = tokenize.state(); // refresh the state
    initialState &= ~0x80; // discard newline expected bit
    int foldingIndent = initialBraceDepth;
    if (TextBlockUserData *userData = BaseTextDocumentLayout::testUserData(currentBlock())) {
        userData->setFoldingIndent(0);
@@ -93,7 +94,7 @@ void CppHighlighter::highlightBlock(const QString &text)
    }
    if (tokens.isEmpty()) {
-        setCurrentBlockState(previousState);
+        setCurrentBlockState(state);
        BaseTextDocumentLayout::clearParentheses(currentBlock());
        if (text.length())  {// the empty line can still contain whitespace
            if (initialState == T_COMMENT)
--- a/src/plugins/cpptools/cppcodeformatter.cpp
+++ b/src/plugins/cpptools/cppcodeformatter.cpp
@@ -70,7 +70,8 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
    restoreCurrentState(block.previous());
    bool endedJoined = false;
-    const int lexerState = tokenizeBlock(block, &endedJoined);
+    // Discard newline expected bit from state
    const int lexerState = tokenizeBlock(block, &endedJoined) & ~0x80;
    m_tokenIndex = 0;
    m_newStates.clear();
@@ -504,7 +505,7 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
                leave();
                continue;
            } else if (m_tokenIndex == m_tokens.size() - 1
-                    && lexerState == T_EOF_SYMBOL) {
+                    && lexerState == 0) {
                leave();
            } else if (m_tokenIndex == 0 && m_currentToken.isComment()) {
                // to allow enter/leave to update the indentDepth
--- a/src/plugins/texteditor/basetextdocumentlayout.h
+++ b/src/plugins/texteditor/basetextdocumentlayout.h
@@ -139,7 +139,7 @@ private:
    uint m_folded : 1;
    uint m_ifdefedOut : 1;
    uint m_foldingIndent : 16;
-    uint m_lexerState : 4;
+    uint m_lexerState : 8;
    uint m_foldingStartIncluded : 1;
    uint m_foldingEndIncluded : 1;
    Parentheses m_parentheses;
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -254,6 +254,18 @@ void tst_SimpleLexer::incremental_data()
            << _("\"foo \\\n\nbar\"")
            << (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL);
    QTest::newRow("escaped_string_literal_with_newline_1")
            << _("\"foo \\")
            << (List() << T_STRING_LITERAL);
    QTest::newRow("escaped_string_literal_with_newline_2")
            << _("")
            << List();
    QTest::newRow("escaped_string_literal_with_newline_3")
            << _("bar")
            << (List() << T_IDENTIFIER);
    QTest::newRow("escaped_string_literal_with_space_and_newline_single")
            << _("\"foo \\   \n   bar\"")
            << (List() << T_STRING_LITERAL);
@@ -263,8 +275,8 @@ void tst_SimpleLexer::incremental_data()
            << (List() << T_STRING_LITERAL);
    QTest::newRow("escaped_string_literal_with_space_and_newline_2")
-            << _("bar\"")
+            << _("bar")
-            << (List() << T_STRING_LITERAL);
+            << (List() << T_IDENTIFIER);
    QTest::newRow("token_after_escaped_string_literal_1")
            << _("\"foo \\")
@@ -310,6 +322,18 @@ void tst_SimpleLexer::incremental_data()
            << _("//foo \\\n\nbar")
            << (List() << T_CPP_COMMENT << T_IDENTIFIER);
    QTest::newRow("escaped_cpp_comment_with_newline_1")
            << _("//foo \\")
            << (List() << T_CPP_COMMENT);
    QTest::newRow("escaped_cpp_comment_with_newline_2")
            << _("")
            << List();
    QTest::newRow("escaped_cpp_comment_with_newline_3")
            << _("bar")
            << (List() << T_IDENTIFIER);
    QTest::newRow("escaped_cpp_comment_with_space_and_newline_single")
            << _("//foo \\   \n   bar")
            << (List() << T_CPP_COMMENT);
@@ -320,7 +344,7 @@ void tst_SimpleLexer::incremental_data()
    QTest::newRow("escaped_cpp_comment_with_space_and_newline_2")
            << _("bar")
-            << (List() << T_CPP_COMMENT);
+            << (List() << T_IDENTIFIER);
 }
 QTEST_APPLESS_MAIN(tst_SimpleLexer)