From e600424648eaebe5cf3ea1fa3ae8a41a22ce7b95 Mon Sep 17 00:00:00 2001 From: Orgad Shaneh Date: Thu, 23 Jan 2014 22:16:43 +0200 Subject: [PATCH] C++: Fix support for incremental input with \n Also fix false positive line continuation on blank line e.g. "foo \ bar" Change-Id: Ic6d345a4b578c955411d119b8438c8dc5065c072 Reviewed-by: Erik Verbruggen --- src/libs/3rdparty/cplusplus/Lexer.cpp | 55 +++++++++++-------- src/libs/3rdparty/cplusplus/Lexer.h | 10 +++- src/plugins/cppeditor/cpphighlighter.cpp | 5 +- src/plugins/cpptools/cppcodeformatter.cpp | 5 +- .../texteditor/basetextdocumentlayout.h | 2 +- tests/auto/cplusplus/lexer/tst_lexer.cpp | 30 +++++++++- 6 files changed, 76 insertions(+), 31 deletions(-) diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp index 9c2f925369c..a796499fe05 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.cpp +++ b/src/libs/3rdparty/cplusplus/Lexer.cpp @@ -32,7 +32,7 @@ using namespace CPlusPlus; Lexer::Lexer(TranslationUnit *unit) : _translationUnit(unit), _control(unit->control()), - _state(T_EOF_SYMBOL), + _state(0), _flags(0), _currentLine(1) { @@ -44,7 +44,7 @@ Lexer::Lexer(TranslationUnit *unit) Lexer::Lexer(const char *firstChar, const char *lastChar) : _translationUnit(0), _control(0), - _state(T_EOF_SYMBOL), + _state(0), _flags(0), _currentLine(1) { @@ -131,8 +131,21 @@ void Lexer::scan_helper(Token *tok) _Lagain: while (_yychar && std::isspace(_yychar)) { if (_yychar == '\n') { - tok->f.joined = false; - tok->f.newline = true; + tok->f.joined = s._newlineExpected; + tok->f.newline = !s._newlineExpected; + + if (s._newlineExpected) { + s._newlineExpected = false; + } else { + switch (s._tokenKind) { + case T_EOF_SYMBOL: + case T_COMMENT: + case T_DOXY_COMMENT: + break; // multiline tokens, don't break on newline + default: // Strings and C++ comments + _state = 0; + } + } } else { tok->f.whitespace = true; } @@ -145,12 +158,14 @@ void Lexer::scan_helper(Token *tok) _tokenStart = _currentChar; tok->offset = _currentChar - _firstChar; - if (_state != T_EOF_SYMBOL && !_yychar) { + if (_yychar) { + s._newlineExpected = false; + } else if (s._tokenKind) { tok->f.kind = T_EOF_SYMBOL; return; } - switch (_state) { + switch (s._tokenKind) { case T_EOF_SYMBOL: break; case T_COMMENT: @@ -164,7 +179,7 @@ void Lexer::scan_helper(Token *tok) yyinp(); if (_yychar == '/') { yyinp(); - _state = T_EOF_SYMBOL; + _state = 0; break; } } @@ -178,13 +193,15 @@ void Lexer::scan_helper(Token *tok) } case T_CPP_COMMENT: case T_CPP_DOXY_COMMENT: - tok->f.kind = _state; - _state = T_EOF_SYMBOL; + tok->f.joined = true; + tok->f.kind = s._tokenKind; + _state = 0; scanCppComment((Kind)tok->f.kind); return; default: // Strings - tok->f.kind = _state; - _state = T_EOF_SYMBOL; + tok->f.joined = true; + tok->f.kind = s._tokenKind; + _state = 0; scanUntilQuote(tok, '"'); return; } @@ -199,14 +216,7 @@ void Lexer::scan_helper(Token *tok) switch (ch) { case '\\': - while (_yychar != '\n' && std::isspace(_yychar)) - yyinp(); - // ### CPP_CHECK(! _yychar || _yychar == '\n'); - if (_yychar == '\n') { - tok->f.joined = true; - tok->f.newline = false; - yyinp(); - } + s._newlineExpected = true; goto _Lagain; case '"': @@ -417,7 +427,7 @@ void Lexer::scan_helper(Token *tok) if (_yychar) yyinp(); else - _state = commentKind; + s._tokenKind = commentKind; if (! f._scanCommentTokens) goto _Lagain; @@ -804,7 +814,8 @@ void Lexer::scanBackslash(Kind type) while (_yychar != '\n' && std::isspace(_yychar)) yyinp(); if (!_yychar) { - _state = type; + s._tokenKind = type; + s._newlineExpected = true; return; } if (_yychar == '\n') { @@ -812,7 +823,7 @@ void Lexer::scanBackslash(Kind type) while (_yychar != '\n' && std::isspace(_yychar)) yyinp(); if (!_yychar) - _state = type; + s._tokenKind = type; } } diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h index 30f9ff8e890..12cef91b4de 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.h +++ b/src/libs/3rdparty/cplusplus/Lexer.h @@ -99,6 +99,11 @@ private: unsigned _scanAngleStringLiteralTokens: 1; }; + struct State { + unsigned char _tokenKind : 7; + unsigned char _newlineExpected : 1; + }; + TranslationUnit *_translationUnit; Control *_control; const char *_firstChar; @@ -106,7 +111,10 @@ private: const char *_lastChar; const char *_tokenStart; unsigned char _yychar; - int _state; + union { + unsigned char _state; + State s; + }; union { unsigned _flags; Flags f; diff --git a/src/plugins/cppeditor/cpphighlighter.cpp b/src/plugins/cppeditor/cpphighlighter.cpp index dec2b2f082c..d039b9e1477 100644 --- a/src/plugins/cppeditor/cpphighlighter.cpp +++ b/src/plugins/cppeditor/cpphighlighter.cpp @@ -66,7 +66,7 @@ CppHighlighter::CppHighlighter(QTextDocument *document) : void CppHighlighter::highlightBlock(const QString &text) { const int previousState = previousBlockState(); - int state = T_EOF_SYMBOL, initialBraceDepth = 0; + int state = 0, initialBraceDepth = 0; if (previousState != -1) { state = previousState & 0xff; initialBraceDepth = previousState >> 8; @@ -85,6 +85,7 @@ void CppHighlighter::highlightBlock(const QString &text) const QList tokens = tokenize(text, initialState); state = tokenize.state(); // refresh the state + initialState &= ~0x80; // discard newline expected bit int foldingIndent = initialBraceDepth; if (TextBlockUserData *userData = BaseTextDocumentLayout::testUserData(currentBlock())) { userData->setFoldingIndent(0); @@ -93,7 +94,7 @@ void CppHighlighter::highlightBlock(const QString &text) } if (tokens.isEmpty()) { - setCurrentBlockState(previousState); + setCurrentBlockState(state); BaseTextDocumentLayout::clearParentheses(currentBlock()); if (text.length()) {// the empty line can still contain whitespace if (initialState == T_COMMENT) diff --git a/src/plugins/cpptools/cppcodeformatter.cpp b/src/plugins/cpptools/cppcodeformatter.cpp index db557959be5..08d51889459 100644 --- a/src/plugins/cpptools/cppcodeformatter.cpp +++ b/src/plugins/cpptools/cppcodeformatter.cpp @@ -70,7 +70,8 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block) restoreCurrentState(block.previous()); bool endedJoined = false; - const int lexerState = tokenizeBlock(block, &endedJoined); + // Discard newline expected bit from state + const int lexerState = tokenizeBlock(block, &endedJoined) & ~0x80; m_tokenIndex = 0; m_newStates.clear(); @@ -504,7 +505,7 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block) leave(); continue; } else if (m_tokenIndex == m_tokens.size() - 1 - && lexerState == T_EOF_SYMBOL) { + && lexerState == 0) { leave(); } else if (m_tokenIndex == 0 && m_currentToken.isComment()) { // to allow enter/leave to update the indentDepth diff --git a/src/plugins/texteditor/basetextdocumentlayout.h b/src/plugins/texteditor/basetextdocumentlayout.h index 2eab6e919a9..ffeda3b0457 100644 --- a/src/plugins/texteditor/basetextdocumentlayout.h +++ b/src/plugins/texteditor/basetextdocumentlayout.h @@ -139,7 +139,7 @@ private: uint m_folded : 1; uint m_ifdefedOut : 1; uint m_foldingIndent : 16; - uint m_lexerState : 4; + uint m_lexerState : 8; uint m_foldingStartIncluded : 1; uint m_foldingEndIncluded : 1; Parentheses m_parentheses; diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp index a0de560b895..359e5a88e6c 100644 --- a/tests/auto/cplusplus/lexer/tst_lexer.cpp +++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp @@ -254,6 +254,18 @@ void tst_SimpleLexer::incremental_data() << _("\"foo \\\n\nbar\"") << (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL); + QTest::newRow("escaped_string_literal_with_newline_1") + << _("\"foo \\") + << (List() << T_STRING_LITERAL); + + QTest::newRow("escaped_string_literal_with_newline_2") + << _("") + << List(); + + QTest::newRow("escaped_string_literal_with_newline_3") + << _("bar") + << (List() << T_IDENTIFIER); + QTest::newRow("escaped_string_literal_with_space_and_newline_single") << _("\"foo \\ \n bar\"") << (List() << T_STRING_LITERAL); @@ -263,8 +275,8 @@ void tst_SimpleLexer::incremental_data() << (List() << T_STRING_LITERAL); QTest::newRow("escaped_string_literal_with_space_and_newline_2") - << _("bar\"") - << (List() << T_STRING_LITERAL); + << _("bar") + << (List() << T_IDENTIFIER); QTest::newRow("token_after_escaped_string_literal_1") << _("\"foo \\") @@ -310,6 +322,18 @@ void tst_SimpleLexer::incremental_data() << _("//foo \\\n\nbar") << (List() << T_CPP_COMMENT << T_IDENTIFIER); + QTest::newRow("escaped_cpp_comment_with_newline_1") + << _("//foo \\") + << (List() << T_CPP_COMMENT); + + QTest::newRow("escaped_cpp_comment_with_newline_2") + << _("") + << List(); + + QTest::newRow("escaped_cpp_comment_with_newline_3") + << _("bar") + << (List() << T_IDENTIFIER); + QTest::newRow("escaped_cpp_comment_with_space_and_newline_single") << _("//foo \\ \n bar") << (List() << T_CPP_COMMENT); @@ -320,7 +344,7 @@ void tst_SimpleLexer::incremental_data() QTest::newRow("escaped_cpp_comment_with_space_and_newline_2") << _("bar") - << (List() << T_CPP_COMMENT); + << (List() << T_IDENTIFIER); } QTEST_APPLESS_MAIN(tst_SimpleLexer)