CppEditor: Fix highlighting of raw string literals

... with the built-in highlighter.
Pass the necessary context information in and out of the SimpleLexer.

Task-number: QTCREATORBUG-26211
Fixes: QTCREATORBUG-26425
Fixes: QTCREATORBUG-26615
Change-Id: Id72f743e07ae117ca51b0d5e527b208dda133b7e
Reviewed-by: <github-actions-qt-creator@cristianadam.eu>
Reviewed-by: David Schulz <david.schulz@qt.io>
This commit is contained in:
Christian Kandeler
2021-12-08 13:57:24 +01:00
parent 3eef024960
commit a3af941adf
7 changed files with 79 additions and 22 deletions

View File

@@ -25,6 +25,8 @@
#include "cppassert.h"
#include <utils/executeondestruction.h>
#include <cctype>
using namespace CPlusPlus;
@@ -213,7 +215,9 @@ void Lexer::scan_helper(Token *tok)
return;
} else if (!control() && isRawStringLiteral(s._tokenKind)) {
tok->f.kind = s._tokenKind;
if (scanUntilRawStringLiteralEndSimple())
const bool found = _expectedRawStringSuffix.isEmpty()
? scanUntilRawStringLiteralEndSimple() : scanUntilRawStringLiteralEndPrecise();
if (found)
_state = 0;
return;
} else { // non-raw strings
@@ -744,6 +748,10 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
{
Utils::ExecuteOnDestruction suffixCleaner;
if (!control())
suffixCleaner.reset([this] { _expectedRawStringSuffix.clear(); });
const char *yytext = _currentChar;
int delimLength = -1;
@@ -766,6 +774,8 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_ERROR;
return;
}
if (!control())
_expectedRawStringSuffix.append(_yychar);
yyinp();
} else {
if (!closingDelimCandidate) {
@@ -808,12 +818,34 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
else
tok->f.kind = T_RAW_STRING_LITERAL;
if (!control() && !closed)
if (!control() && !closed) {
suffixCleaner.reset([]{});
s._tokenKind = tok->f.kind;
_expectedRawStringSuffix.prepend(')');
_expectedRawStringSuffix.append('"');
}
}
// In the highlighting case we don't have any further information
// like the delimiter or its length, so just match for: ...)..."
bool Lexer::scanUntilRawStringLiteralEndPrecise()
{
int matchLen = 0;
while (_yychar) {
if (_yychar == _expectedRawStringSuffix.at(matchLen)) {
if (++matchLen == _expectedRawStringSuffix.length()) {
_expectedRawStringSuffix.clear();
yyinp();
return true;
}
} else {
matchLen = 0;
}
yyinp();
}
return false;
}
// In case we don't have any further information
// like the delimiter or its length, just match for: ...)..."
bool Lexer::scanUntilRawStringLiteralEndSimple()
{
bool closingParenthesisPassed = false;

View File

@@ -23,6 +23,8 @@
#include "CPlusPlusForwardDeclarations.h"
#include "Token.h"
#include <QByteArray>
namespace CPlusPlus {
class CPLUSPLUS_EXPORT Lexer
@@ -62,6 +64,10 @@ public:
void setPreprocessorMode(bool onoff)
{ f._ppMode = onoff; }
QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix)
{ _expectedRawStringSuffix = suffix; }
public:
static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
unsigned &utf16charCounter)
@@ -94,6 +100,7 @@ private:
void scanStringLiteral(Token *tok, unsigned char hint = 0);
void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
bool scanUntilRawStringLiteralEndPrecise();
bool scanUntilRawStringLiteralEndSimple();
void scanCharLiteral(Token *tok, unsigned char hint = 0);
void scanUntilQuote(Token *tok, unsigned char quote);
@@ -134,6 +141,7 @@ private:
TranslationUnit *_translationUnit;
Control *_control;
QByteArray _expectedRawStringSuffix;
const char *_firstChar;
const char *_currentChar;
const char *_lastChar;

View File

@@ -67,6 +67,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
const char *lastChar = firstChar + bytes.size();
Lexer lex(firstChar, lastChar);
lex.setExpectedRawStringSuffix(_expectedRawStringSuffix);
lex.setLanguageFeatures(_languageFeatures);
lex.setStartWithNewline(true);
lex.setPreprocessorMode(_ppMode);
@@ -108,6 +109,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
}
_lastState = lex.state();
_expectedRawStringSuffix = lex.expectedRawStringSuffix();
return tokens;
}

View File

@@ -59,6 +59,10 @@ public:
int state() const
{ return _lastState; }
QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix)
{ _expectedRawStringSuffix = suffix; }
static int tokenAt(const Tokens &tokens, int utf16charsOffset);
static Token tokenAt(const QString &text,
int utf16charsOffset,
@@ -68,6 +72,7 @@ public:
static int tokenBefore(const Tokens &tokens, int utf16charsOffset);
private:
QByteArray _expectedRawStringSuffix;
int _lastState;
LanguageFeatures _languageFeatures;
bool _skipComments: 1;

View File

@@ -61,6 +61,9 @@ void CppHighlighter::highlightBlock(const QString &text)
SimpleLexer tokenize;
tokenize.setLanguageFeatures(m_languageFeatures);
const QTextBlock prevBlock = currentBlock().previous();
if (prevBlock.isValid())
tokenize.setExpectedRawStringSuffix(TextDocumentLayout::expectedRawStringSuffix(prevBlock));
int initialLexerState = lexerState;
const Tokens tokens = tokenize(text, initialLexerState);
@@ -99,7 +102,6 @@ void CppHighlighter::highlightBlock(const QString &text)
bool expectPreprocessorKeyword = false;
bool onlyHighlightComments = false;
bool blockHasPreprocessorDirective = false;
for (int i = 0; i < tokens.size(); ++i) {
const Token &tk = tokens.at(i);
@@ -157,7 +159,6 @@ void CppHighlighter::highlightBlock(const QString &text)
setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
formatForCategory(C_PREPROCESSOR));
expectPreprocessorKeyword = true;
blockHasPreprocessorDirective = true;
} else if (highlightCurrentWordAsPreprocessor && (tk.isKeyword() || tk.is(T_IDENTIFIER))
&& isPPKeyword(Utils::midView(text, tk.utf16charsBegin(), tk.utf16chars()))) {
setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_PREPROCESSOR));
@@ -171,22 +172,7 @@ void CppHighlighter::highlightBlock(const QString &text)
} else if (tk.is(T_NUMERIC_LITERAL)) {
setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_NUMBER));
} else if (tk.isStringLiteral() || tk.isCharLiteral()) {
// Our highlighting is broken for multi-line raw string literals, so if a superior
// option is available, don't do anything.
// Note that this does not just save unneeded work, but can actually be required,
// because mis-detected strings are not necessarily overwritten by the semantic
// highlighter. Example:
// const char *s = R"delim(
// line1
// "line)" // <- is misdeteced by SimpleLexer as end of raw string literal
// line3 // <- erroneously not formatted by us, but that would be ok;
// // the semantic highlighter does it for us later
// )delim"; // <- end quote is erroneously interpreted as *start* of a string,
// // and because clangd does not include punctuation in its semantic
// // tokens, the semicolon would stay formatted as a string even
// // after the semantic highlighter has run.
if ((!CppModelManager::instance()->isClangCodeModelActive()
|| blockHasPreprocessorDirective) && !highlightRawStringLiteral(text, tk)) {
if (!highlightRawStringLiteral(text, tk)) {
setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
formatForCategory(C_STRING));
}
@@ -288,6 +274,8 @@ void CppHighlighter::highlightBlock(const QString &text)
}
setCurrentBlockState((braceDepth << 8) | tokenize.state());
TextDocumentLayout::setExpectedRawStringSuffix(currentBlock(),
tokenize.expectedRawStringSuffix());
}
void CppHighlighter::setLanguageFeatures(const LanguageFeatures &languageFeatures)

View File

@@ -523,6 +523,22 @@ void TextDocumentLayout::setFolded(const QTextBlock &block, bool folded)
emit layout->foldChanged(block.blockNumber(), folded);
}
void TextDocumentLayout::setExpectedRawStringSuffix(const QTextBlock &block,
const QByteArray &suffix)
{
if (TextBlockUserData * const data = textUserData(block))
data->setExpectedRawStringSuffix(suffix);
else if (!suffix.isEmpty())
userData(block)->setExpectedRawStringSuffix(suffix);
}
QByteArray TextDocumentLayout::expectedRawStringSuffix(const QTextBlock &block)
{
if (TextBlockUserData *userData = textUserData(block))
return userData->expectedRawStringSuffix();
return {};
}
void TextDocumentLayout::requestExtraAreaUpdate()
{
emit updateExtraArea();

View File

@@ -145,6 +145,9 @@ public:
KSyntaxHighlighting::State syntaxState() { return m_syntaxState; }
void setSyntaxState(KSyntaxHighlighting::State state) { m_syntaxState = state; }
QByteArray expectedRawStringSuffix() { return m_expectedRawStringSuffix; }
void setExpectedRawStringSuffix(const QByteArray &suffix) { m_expectedRawStringSuffix = suffix; }
private:
TextMarks m_marks;
int m_foldingIndent : 16;
@@ -157,6 +160,7 @@ private:
Parentheses m_parentheses;
CodeFormatterData *m_codeFormatterData;
KSyntaxHighlighting::State m_syntaxState;
QByteArray m_expectedRawStringSuffix; // A bit C++-specific, but let's be pragmatic.
};
@@ -188,6 +192,8 @@ public:
static void doFoldOrUnfold(const QTextBlock& block, bool unfold);
static bool isFolded(const QTextBlock &block);
static void setFolded(const QTextBlock &block, bool folded);
static void setExpectedRawStringSuffix(const QTextBlock &block, const QByteArray &suffix);
static QByteArray expectedRawStringSuffix(const QTextBlock &block);
class TEXTEDITOR_EXPORT FoldValidator
{