CppEditor: Fix highlighting of raw string literals

... with the built-in highlighter.
Pass the necessary context information in and out of the SimpleLexer.

Task-number: QTCREATORBUG-26211
Fixes: QTCREATORBUG-26425
Fixes: QTCREATORBUG-26615
Change-Id: Id72f743e07ae117ca51b0d5e527b208dda133b7e
Reviewed-by: <github-actions-qt-creator@cristianadam.eu>
Reviewed-by: David Schulz <david.schulz@qt.io>
This commit is contained in:
Christian Kandeler
2021-12-08 13:57:24 +01:00
parent 3eef024960
commit a3af941adf
7 changed files with 79 additions and 22 deletions

View File

@@ -25,6 +25,8 @@
#include "cppassert.h" #include "cppassert.h"
#include <utils/executeondestruction.h>
#include <cctype> #include <cctype>
using namespace CPlusPlus; using namespace CPlusPlus;
@@ -213,7 +215,9 @@ void Lexer::scan_helper(Token *tok)
return; return;
} else if (!control() && isRawStringLiteral(s._tokenKind)) { } else if (!control() && isRawStringLiteral(s._tokenKind)) {
tok->f.kind = s._tokenKind; tok->f.kind = s._tokenKind;
if (scanUntilRawStringLiteralEndSimple()) const bool found = _expectedRawStringSuffix.isEmpty()
? scanUntilRawStringLiteralEndSimple() : scanUntilRawStringLiteralEndPrecise();
if (found)
_state = 0; _state = 0;
return; return;
} else { // non-raw strings } else { // non-raw strings
@@ -744,6 +748,10 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint) void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
{ {
Utils::ExecuteOnDestruction suffixCleaner;
if (!control())
suffixCleaner.reset([this] { _expectedRawStringSuffix.clear(); });
const char *yytext = _currentChar; const char *yytext = _currentChar;
int delimLength = -1; int delimLength = -1;
@@ -766,6 +774,8 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_ERROR; tok->f.kind = T_ERROR;
return; return;
} }
if (!control())
_expectedRawStringSuffix.append(_yychar);
yyinp(); yyinp();
} else { } else {
if (!closingDelimCandidate) { if (!closingDelimCandidate) {
@@ -808,12 +818,34 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
else else
tok->f.kind = T_RAW_STRING_LITERAL; tok->f.kind = T_RAW_STRING_LITERAL;
if (!control() && !closed) if (!control() && !closed) {
suffixCleaner.reset([]{});
s._tokenKind = tok->f.kind; s._tokenKind = tok->f.kind;
_expectedRawStringSuffix.prepend(')');
_expectedRawStringSuffix.append('"');
}
} }
// In the highlighting case we don't have any further information bool Lexer::scanUntilRawStringLiteralEndPrecise()
// like the delimiter or its length, so just match for: ...)..." {
int matchLen = 0;
while (_yychar) {
if (_yychar == _expectedRawStringSuffix.at(matchLen)) {
if (++matchLen == _expectedRawStringSuffix.length()) {
_expectedRawStringSuffix.clear();
yyinp();
return true;
}
} else {
matchLen = 0;
}
yyinp();
}
return false;
}
// In case we don't have any further information
// like the delimiter or its length, just match for: ...)..."
bool Lexer::scanUntilRawStringLiteralEndSimple() bool Lexer::scanUntilRawStringLiteralEndSimple()
{ {
bool closingParenthesisPassed = false; bool closingParenthesisPassed = false;

View File

@@ -23,6 +23,8 @@
#include "CPlusPlusForwardDeclarations.h" #include "CPlusPlusForwardDeclarations.h"
#include "Token.h" #include "Token.h"
#include <QByteArray>
namespace CPlusPlus { namespace CPlusPlus {
class CPLUSPLUS_EXPORT Lexer class CPLUSPLUS_EXPORT Lexer
@@ -62,6 +64,10 @@ public:
void setPreprocessorMode(bool onoff) void setPreprocessorMode(bool onoff)
{ f._ppMode = onoff; } { f._ppMode = onoff; }
QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix)
{ _expectedRawStringSuffix = suffix; }
public: public:
static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar, static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
unsigned &utf16charCounter) unsigned &utf16charCounter)
@@ -94,6 +100,7 @@ private:
void scanStringLiteral(Token *tok, unsigned char hint = 0); void scanStringLiteral(Token *tok, unsigned char hint = 0);
void scanRawStringLiteral(Token *tok, unsigned char hint = 0); void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
bool scanUntilRawStringLiteralEndPrecise();
bool scanUntilRawStringLiteralEndSimple(); bool scanUntilRawStringLiteralEndSimple();
void scanCharLiteral(Token *tok, unsigned char hint = 0); void scanCharLiteral(Token *tok, unsigned char hint = 0);
void scanUntilQuote(Token *tok, unsigned char quote); void scanUntilQuote(Token *tok, unsigned char quote);
@@ -134,6 +141,7 @@ private:
TranslationUnit *_translationUnit; TranslationUnit *_translationUnit;
Control *_control; Control *_control;
QByteArray _expectedRawStringSuffix;
const char *_firstChar; const char *_firstChar;
const char *_currentChar; const char *_currentChar;
const char *_lastChar; const char *_lastChar;

View File

@@ -67,6 +67,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
const char *lastChar = firstChar + bytes.size(); const char *lastChar = firstChar + bytes.size();
Lexer lex(firstChar, lastChar); Lexer lex(firstChar, lastChar);
lex.setExpectedRawStringSuffix(_expectedRawStringSuffix);
lex.setLanguageFeatures(_languageFeatures); lex.setLanguageFeatures(_languageFeatures);
lex.setStartWithNewline(true); lex.setStartWithNewline(true);
lex.setPreprocessorMode(_ppMode); lex.setPreprocessorMode(_ppMode);
@@ -108,6 +109,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
} }
_lastState = lex.state(); _lastState = lex.state();
_expectedRawStringSuffix = lex.expectedRawStringSuffix();
return tokens; return tokens;
} }

View File

@@ -59,6 +59,10 @@ public:
int state() const int state() const
{ return _lastState; } { return _lastState; }
QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix)
{ _expectedRawStringSuffix = suffix; }
static int tokenAt(const Tokens &tokens, int utf16charsOffset); static int tokenAt(const Tokens &tokens, int utf16charsOffset);
static Token tokenAt(const QString &text, static Token tokenAt(const QString &text,
int utf16charsOffset, int utf16charsOffset,
@@ -68,6 +72,7 @@ public:
static int tokenBefore(const Tokens &tokens, int utf16charsOffset); static int tokenBefore(const Tokens &tokens, int utf16charsOffset);
private: private:
QByteArray _expectedRawStringSuffix;
int _lastState; int _lastState;
LanguageFeatures _languageFeatures; LanguageFeatures _languageFeatures;
bool _skipComments: 1; bool _skipComments: 1;

View File

@@ -61,6 +61,9 @@ void CppHighlighter::highlightBlock(const QString &text)
SimpleLexer tokenize; SimpleLexer tokenize;
tokenize.setLanguageFeatures(m_languageFeatures); tokenize.setLanguageFeatures(m_languageFeatures);
const QTextBlock prevBlock = currentBlock().previous();
if (prevBlock.isValid())
tokenize.setExpectedRawStringSuffix(TextDocumentLayout::expectedRawStringSuffix(prevBlock));
int initialLexerState = lexerState; int initialLexerState = lexerState;
const Tokens tokens = tokenize(text, initialLexerState); const Tokens tokens = tokenize(text, initialLexerState);
@@ -99,7 +102,6 @@ void CppHighlighter::highlightBlock(const QString &text)
bool expectPreprocessorKeyword = false; bool expectPreprocessorKeyword = false;
bool onlyHighlightComments = false; bool onlyHighlightComments = false;
bool blockHasPreprocessorDirective = false;
for (int i = 0; i < tokens.size(); ++i) { for (int i = 0; i < tokens.size(); ++i) {
const Token &tk = tokens.at(i); const Token &tk = tokens.at(i);
@@ -157,7 +159,6 @@ void CppHighlighter::highlightBlock(const QString &text)
setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(), setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
formatForCategory(C_PREPROCESSOR)); formatForCategory(C_PREPROCESSOR));
expectPreprocessorKeyword = true; expectPreprocessorKeyword = true;
blockHasPreprocessorDirective = true;
} else if (highlightCurrentWordAsPreprocessor && (tk.isKeyword() || tk.is(T_IDENTIFIER)) } else if (highlightCurrentWordAsPreprocessor && (tk.isKeyword() || tk.is(T_IDENTIFIER))
&& isPPKeyword(Utils::midView(text, tk.utf16charsBegin(), tk.utf16chars()))) { && isPPKeyword(Utils::midView(text, tk.utf16charsBegin(), tk.utf16chars()))) {
setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_PREPROCESSOR)); setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_PREPROCESSOR));
@@ -171,22 +172,7 @@ void CppHighlighter::highlightBlock(const QString &text)
} else if (tk.is(T_NUMERIC_LITERAL)) { } else if (tk.is(T_NUMERIC_LITERAL)) {
setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_NUMBER)); setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_NUMBER));
} else if (tk.isStringLiteral() || tk.isCharLiteral()) { } else if (tk.isStringLiteral() || tk.isCharLiteral()) {
// Our highlighting is broken for multi-line raw string literals, so if a superior if (!highlightRawStringLiteral(text, tk)) {
// option is available, don't do anything.
// Note that this does not just save unneeded work, but can actually be required,
// because mis-detected strings are not necessarily overwritten by the semantic
// highlighter. Example:
// const char *s = R"delim(
// line1
// "line)" // <- is misdeteced by SimpleLexer as end of raw string literal
// line3 // <- erroneously not formatted by us, but that would be ok;
// // the semantic highlighter does it for us later
// )delim"; // <- end quote is erroneously interpreted as *start* of a string,
// // and because clangd does not include punctuation in its semantic
// // tokens, the semicolon would stay formatted as a string even
// // after the semantic highlighter has run.
if ((!CppModelManager::instance()->isClangCodeModelActive()
|| blockHasPreprocessorDirective) && !highlightRawStringLiteral(text, tk)) {
setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(), setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
formatForCategory(C_STRING)); formatForCategory(C_STRING));
} }
@@ -288,6 +274,8 @@ void CppHighlighter::highlightBlock(const QString &text)
} }
setCurrentBlockState((braceDepth << 8) | tokenize.state()); setCurrentBlockState((braceDepth << 8) | tokenize.state());
TextDocumentLayout::setExpectedRawStringSuffix(currentBlock(),
tokenize.expectedRawStringSuffix());
} }
void CppHighlighter::setLanguageFeatures(const LanguageFeatures &languageFeatures) void CppHighlighter::setLanguageFeatures(const LanguageFeatures &languageFeatures)

View File

@@ -523,6 +523,22 @@ void TextDocumentLayout::setFolded(const QTextBlock &block, bool folded)
emit layout->foldChanged(block.blockNumber(), folded); emit layout->foldChanged(block.blockNumber(), folded);
} }
void TextDocumentLayout::setExpectedRawStringSuffix(const QTextBlock &block,
const QByteArray &suffix)
{
if (TextBlockUserData * const data = textUserData(block))
data->setExpectedRawStringSuffix(suffix);
else if (!suffix.isEmpty())
userData(block)->setExpectedRawStringSuffix(suffix);
}
QByteArray TextDocumentLayout::expectedRawStringSuffix(const QTextBlock &block)
{
if (TextBlockUserData *userData = textUserData(block))
return userData->expectedRawStringSuffix();
return {};
}
void TextDocumentLayout::requestExtraAreaUpdate() void TextDocumentLayout::requestExtraAreaUpdate()
{ {
emit updateExtraArea(); emit updateExtraArea();

View File

@@ -145,6 +145,9 @@ public:
KSyntaxHighlighting::State syntaxState() { return m_syntaxState; } KSyntaxHighlighting::State syntaxState() { return m_syntaxState; }
void setSyntaxState(KSyntaxHighlighting::State state) { m_syntaxState = state; } void setSyntaxState(KSyntaxHighlighting::State state) { m_syntaxState = state; }
QByteArray expectedRawStringSuffix() { return m_expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix) { m_expectedRawStringSuffix = suffix; }
private: private:
TextMarks m_marks; TextMarks m_marks;
int m_foldingIndent : 16; int m_foldingIndent : 16;
@@ -157,6 +160,7 @@ private:
Parentheses m_parentheses; Parentheses m_parentheses;
CodeFormatterData *m_codeFormatterData; CodeFormatterData *m_codeFormatterData;
KSyntaxHighlighting::State m_syntaxState; KSyntaxHighlighting::State m_syntaxState;
QByteArray m_expectedRawStringSuffix; // A bit C++-specific, but let's be pragmatic.
}; };
@@ -188,6 +192,8 @@ public:
static void doFoldOrUnfold(const QTextBlock& block, bool unfold); static void doFoldOrUnfold(const QTextBlock& block, bool unfold);
static bool isFolded(const QTextBlock &block); static bool isFolded(const QTextBlock &block);
static void setFolded(const QTextBlock &block, bool folded); static void setFolded(const QTextBlock &block, bool folded);
static void setExpectedRawStringSuffix(const QTextBlock &block, const QByteArray &suffix);
static QByteArray expectedRawStringSuffix(const QTextBlock &block);
class TEXTEDITOR_EXPORT FoldValidator class TEXTEDITOR_EXPORT FoldValidator
{ {