C++: Fix support for incremental input with \n

Also fix false positive line continuation on blank line

e.g.
"foo \

bar"

Change-Id: Ic6d345a4b578c955411d119b8438c8dc5065c072
Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
This commit is contained in:
Orgad Shaneh
2014-01-23 22:16:43 +02:00
committed by Orgad Shaneh
parent fd31b4716c
commit e600424648
6 changed files with 76 additions and 31 deletions

View File

@@ -32,7 +32,7 @@ using namespace CPlusPlus;
Lexer::Lexer(TranslationUnit *unit) Lexer::Lexer(TranslationUnit *unit)
: _translationUnit(unit), : _translationUnit(unit),
_control(unit->control()), _control(unit->control()),
_state(T_EOF_SYMBOL), _state(0),
_flags(0), _flags(0),
_currentLine(1) _currentLine(1)
{ {
@@ -44,7 +44,7 @@ Lexer::Lexer(TranslationUnit *unit)
Lexer::Lexer(const char *firstChar, const char *lastChar) Lexer::Lexer(const char *firstChar, const char *lastChar)
: _translationUnit(0), : _translationUnit(0),
_control(0), _control(0),
_state(T_EOF_SYMBOL), _state(0),
_flags(0), _flags(0),
_currentLine(1) _currentLine(1)
{ {
@@ -131,8 +131,21 @@ void Lexer::scan_helper(Token *tok)
_Lagain: _Lagain:
while (_yychar && std::isspace(_yychar)) { while (_yychar && std::isspace(_yychar)) {
if (_yychar == '\n') { if (_yychar == '\n') {
tok->f.joined = false; tok->f.joined = s._newlineExpected;
tok->f.newline = true; tok->f.newline = !s._newlineExpected;
if (s._newlineExpected) {
s._newlineExpected = false;
} else {
switch (s._tokenKind) {
case T_EOF_SYMBOL:
case T_COMMENT:
case T_DOXY_COMMENT:
break; // multiline tokens, don't break on newline
default: // Strings and C++ comments
_state = 0;
}
}
} else { } else {
tok->f.whitespace = true; tok->f.whitespace = true;
} }
@@ -145,12 +158,14 @@ void Lexer::scan_helper(Token *tok)
_tokenStart = _currentChar; _tokenStart = _currentChar;
tok->offset = _currentChar - _firstChar; tok->offset = _currentChar - _firstChar;
if (_state != T_EOF_SYMBOL && !_yychar) { if (_yychar) {
s._newlineExpected = false;
} else if (s._tokenKind) {
tok->f.kind = T_EOF_SYMBOL; tok->f.kind = T_EOF_SYMBOL;
return; return;
} }
switch (_state) { switch (s._tokenKind) {
case T_EOF_SYMBOL: case T_EOF_SYMBOL:
break; break;
case T_COMMENT: case T_COMMENT:
@@ -164,7 +179,7 @@ void Lexer::scan_helper(Token *tok)
yyinp(); yyinp();
if (_yychar == '/') { if (_yychar == '/') {
yyinp(); yyinp();
_state = T_EOF_SYMBOL; _state = 0;
break; break;
} }
} }
@@ -178,13 +193,15 @@ void Lexer::scan_helper(Token *tok)
} }
case T_CPP_COMMENT: case T_CPP_COMMENT:
case T_CPP_DOXY_COMMENT: case T_CPP_DOXY_COMMENT:
tok->f.kind = _state; tok->f.joined = true;
_state = T_EOF_SYMBOL; tok->f.kind = s._tokenKind;
_state = 0;
scanCppComment((Kind)tok->f.kind); scanCppComment((Kind)tok->f.kind);
return; return;
default: // Strings default: // Strings
tok->f.kind = _state; tok->f.joined = true;
_state = T_EOF_SYMBOL; tok->f.kind = s._tokenKind;
_state = 0;
scanUntilQuote(tok, '"'); scanUntilQuote(tok, '"');
return; return;
} }
@@ -199,14 +216,7 @@ void Lexer::scan_helper(Token *tok)
switch (ch) { switch (ch) {
case '\\': case '\\':
while (_yychar != '\n' && std::isspace(_yychar)) s._newlineExpected = true;
yyinp();
// ### CPP_CHECK(! _yychar || _yychar == '\n');
if (_yychar == '\n') {
tok->f.joined = true;
tok->f.newline = false;
yyinp();
}
goto _Lagain; goto _Lagain;
case '"': case '"':
@@ -417,7 +427,7 @@ void Lexer::scan_helper(Token *tok)
if (_yychar) if (_yychar)
yyinp(); yyinp();
else else
_state = commentKind; s._tokenKind = commentKind;
if (! f._scanCommentTokens) if (! f._scanCommentTokens)
goto _Lagain; goto _Lagain;
@@ -804,7 +814,8 @@ void Lexer::scanBackslash(Kind type)
while (_yychar != '\n' && std::isspace(_yychar)) while (_yychar != '\n' && std::isspace(_yychar))
yyinp(); yyinp();
if (!_yychar) { if (!_yychar) {
_state = type; s._tokenKind = type;
s._newlineExpected = true;
return; return;
} }
if (_yychar == '\n') { if (_yychar == '\n') {
@@ -812,7 +823,7 @@ void Lexer::scanBackslash(Kind type)
while (_yychar != '\n' && std::isspace(_yychar)) while (_yychar != '\n' && std::isspace(_yychar))
yyinp(); yyinp();
if (!_yychar) if (!_yychar)
_state = type; s._tokenKind = type;
} }
} }

View File

@@ -99,6 +99,11 @@ private:
unsigned _scanAngleStringLiteralTokens: 1; unsigned _scanAngleStringLiteralTokens: 1;
}; };
struct State {
unsigned char _tokenKind : 7;
unsigned char _newlineExpected : 1;
};
TranslationUnit *_translationUnit; TranslationUnit *_translationUnit;
Control *_control; Control *_control;
const char *_firstChar; const char *_firstChar;
@@ -106,7 +111,10 @@ private:
const char *_lastChar; const char *_lastChar;
const char *_tokenStart; const char *_tokenStart;
unsigned char _yychar; unsigned char _yychar;
int _state; union {
unsigned char _state;
State s;
};
union { union {
unsigned _flags; unsigned _flags;
Flags f; Flags f;

View File

@@ -66,7 +66,7 @@ CppHighlighter::CppHighlighter(QTextDocument *document) :
void CppHighlighter::highlightBlock(const QString &text) void CppHighlighter::highlightBlock(const QString &text)
{ {
const int previousState = previousBlockState(); const int previousState = previousBlockState();
int state = T_EOF_SYMBOL, initialBraceDepth = 0; int state = 0, initialBraceDepth = 0;
if (previousState != -1) { if (previousState != -1) {
state = previousState & 0xff; state = previousState & 0xff;
initialBraceDepth = previousState >> 8; initialBraceDepth = previousState >> 8;
@@ -85,6 +85,7 @@ void CppHighlighter::highlightBlock(const QString &text)
const QList<Token> tokens = tokenize(text, initialState); const QList<Token> tokens = tokenize(text, initialState);
state = tokenize.state(); // refresh the state state = tokenize.state(); // refresh the state
initialState &= ~0x80; // discard newline expected bit
int foldingIndent = initialBraceDepth; int foldingIndent = initialBraceDepth;
if (TextBlockUserData *userData = BaseTextDocumentLayout::testUserData(currentBlock())) { if (TextBlockUserData *userData = BaseTextDocumentLayout::testUserData(currentBlock())) {
userData->setFoldingIndent(0); userData->setFoldingIndent(0);
@@ -93,7 +94,7 @@ void CppHighlighter::highlightBlock(const QString &text)
} }
if (tokens.isEmpty()) { if (tokens.isEmpty()) {
setCurrentBlockState(previousState); setCurrentBlockState(state);
BaseTextDocumentLayout::clearParentheses(currentBlock()); BaseTextDocumentLayout::clearParentheses(currentBlock());
if (text.length()) {// the empty line can still contain whitespace if (text.length()) {// the empty line can still contain whitespace
if (initialState == T_COMMENT) if (initialState == T_COMMENT)

View File

@@ -70,7 +70,8 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
restoreCurrentState(block.previous()); restoreCurrentState(block.previous());
bool endedJoined = false; bool endedJoined = false;
const int lexerState = tokenizeBlock(block, &endedJoined); // Discard newline expected bit from state
const int lexerState = tokenizeBlock(block, &endedJoined) & ~0x80;
m_tokenIndex = 0; m_tokenIndex = 0;
m_newStates.clear(); m_newStates.clear();
@@ -504,7 +505,7 @@ void CodeFormatter::recalculateStateAfter(const QTextBlock &block)
leave(); leave();
continue; continue;
} else if (m_tokenIndex == m_tokens.size() - 1 } else if (m_tokenIndex == m_tokens.size() - 1
&& lexerState == T_EOF_SYMBOL) { && lexerState == 0) {
leave(); leave();
} else if (m_tokenIndex == 0 && m_currentToken.isComment()) { } else if (m_tokenIndex == 0 && m_currentToken.isComment()) {
// to allow enter/leave to update the indentDepth // to allow enter/leave to update the indentDepth

View File

@@ -139,7 +139,7 @@ private:
uint m_folded : 1; uint m_folded : 1;
uint m_ifdefedOut : 1; uint m_ifdefedOut : 1;
uint m_foldingIndent : 16; uint m_foldingIndent : 16;
uint m_lexerState : 4; uint m_lexerState : 8;
uint m_foldingStartIncluded : 1; uint m_foldingStartIncluded : 1;
uint m_foldingEndIncluded : 1; uint m_foldingEndIncluded : 1;
Parentheses m_parentheses; Parentheses m_parentheses;

View File

@@ -254,6 +254,18 @@ void tst_SimpleLexer::incremental_data()
<< _("\"foo \\\n\nbar\"") << _("\"foo \\\n\nbar\"")
<< (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL); << (List() << T_STRING_LITERAL << T_IDENTIFIER << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_newline_1")
<< _("\"foo \\")
<< (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_newline_2")
<< _("")
<< List();
QTest::newRow("escaped_string_literal_with_newline_3")
<< _("bar")
<< (List() << T_IDENTIFIER);
QTest::newRow("escaped_string_literal_with_space_and_newline_single") QTest::newRow("escaped_string_literal_with_space_and_newline_single")
<< _("\"foo \\ \n bar\"") << _("\"foo \\ \n bar\"")
<< (List() << T_STRING_LITERAL); << (List() << T_STRING_LITERAL);
@@ -263,8 +275,8 @@ void tst_SimpleLexer::incremental_data()
<< (List() << T_STRING_LITERAL); << (List() << T_STRING_LITERAL);
QTest::newRow("escaped_string_literal_with_space_and_newline_2") QTest::newRow("escaped_string_literal_with_space_and_newline_2")
<< _("bar\"") << _("bar")
<< (List() << T_STRING_LITERAL); << (List() << T_IDENTIFIER);
QTest::newRow("token_after_escaped_string_literal_1") QTest::newRow("token_after_escaped_string_literal_1")
<< _("\"foo \\") << _("\"foo \\")
@@ -310,6 +322,18 @@ void tst_SimpleLexer::incremental_data()
<< _("//foo \\\n\nbar") << _("//foo \\\n\nbar")
<< (List() << T_CPP_COMMENT << T_IDENTIFIER); << (List() << T_CPP_COMMENT << T_IDENTIFIER);
QTest::newRow("escaped_cpp_comment_with_newline_1")
<< _("//foo \\")
<< (List() << T_CPP_COMMENT);
QTest::newRow("escaped_cpp_comment_with_newline_2")
<< _("")
<< List();
QTest::newRow("escaped_cpp_comment_with_newline_3")
<< _("bar")
<< (List() << T_IDENTIFIER);
QTest::newRow("escaped_cpp_comment_with_space_and_newline_single") QTest::newRow("escaped_cpp_comment_with_space_and_newline_single")
<< _("//foo \\ \n bar") << _("//foo \\ \n bar")
<< (List() << T_CPP_COMMENT); << (List() << T_CPP_COMMENT);
@@ -320,7 +344,7 @@ void tst_SimpleLexer::incremental_data()
QTest::newRow("escaped_cpp_comment_with_space_and_newline_2") QTest::newRow("escaped_cpp_comment_with_space_and_newline_2")
<< _("bar") << _("bar")
<< (List() << T_CPP_COMMENT); << (List() << T_IDENTIFIER);
} }
QTEST_APPLESS_MAIN(tst_SimpleLexer) QTEST_APPLESS_MAIN(tst_SimpleLexer)