CppEditor: Fix highlighting of raw string literals

... with the built-in highlighter. Pass the necessary context information in and out of the SimpleLexer. Task-number: QTCREATORBUG-26211 Fixes: QTCREATORBUG-26425 Fixes: QTCREATORBUG-26615 Change-Id: Id72f743e07ae117ca51b0d5e527b208dda133b7e Reviewed-by: <github-actions-qt-creator@cristianadam.eu> Reviewed-by: David Schulz <david.schulz@qt.io>
2021-12-08 13:57:24 +01:00
parent 3eef024960
commit a3af941adf
7 changed files with 79 additions and 22 deletions
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -25,6 +25,8 @@
 #include "cppassert.h"
 #include <utils/executeondestruction.h>
 #include <cctype>
 using namespace CPlusPlus;
@@ -213,7 +215,9 @@ void Lexer::scan_helper(Token *tok)
        return;
    } else if (!control() && isRawStringLiteral(s._tokenKind)) {
        tok->f.kind = s._tokenKind;
-        if (scanUntilRawStringLiteralEndSimple())
+        const bool found = _expectedRawStringSuffix.isEmpty()
                ? scanUntilRawStringLiteralEndSimple() : scanUntilRawStringLiteralEndPrecise();
        if (found)
            _state = 0;
        return;
    } else { // non-raw strings
@@ -744,6 +748,10 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
 void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
 {
    Utils::ExecuteOnDestruction suffixCleaner;
    if (!control())
        suffixCleaner.reset([this] { _expectedRawStringSuffix.clear(); });
    const char *yytext = _currentChar;
    int delimLength = -1;
@@ -766,6 +774,8 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
                    tok->f.kind = T_ERROR;
                    return;
                }
                if (!control())
                    _expectedRawStringSuffix.append(_yychar);
                yyinp();
            } else {
                if (!closingDelimCandidate) {
@@ -808,12 +818,34 @@ void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
    else
        tok->f.kind = T_RAW_STRING_LITERAL;
-    if (!control() && !closed)
+    if (!control() && !closed) {
        suffixCleaner.reset([]{});
        s._tokenKind = tok->f.kind;
        _expectedRawStringSuffix.prepend(')');
        _expectedRawStringSuffix.append('"');
    }
 }
-// In the highlighting case we don't have any further information
+bool Lexer::scanUntilRawStringLiteralEndPrecise()
-// like the delimiter or its length, so just match for: ...)..."
+{
    int matchLen = 0;
    while (_yychar) {
        if (_yychar == _expectedRawStringSuffix.at(matchLen)) {
            if (++matchLen == _expectedRawStringSuffix.length()) {
                _expectedRawStringSuffix.clear();
                yyinp();
                return true;
            }
        } else {
            matchLen = 0;
        }
        yyinp();
    }
    return false;
 }
 // In case we don't have any further information
 // like the delimiter or its length, just match for: ...)..."
 bool Lexer::scanUntilRawStringLiteralEndSimple()
 {
    bool closingParenthesisPassed = false;
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -23,6 +23,8 @@
 #include "CPlusPlusForwardDeclarations.h"
 #include "Token.h"
 #include <QByteArray>
 namespace CPlusPlus {
 class CPLUSPLUS_EXPORT Lexer
@@ -62,6 +64,10 @@ public:
    void setPreprocessorMode(bool onoff)
    { f._ppMode = onoff; }
    QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
    void setExpectedRawStringSuffix(const QByteArray &suffix)
    { _expectedRawStringSuffix = suffix; }
 public:
    static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
                           unsigned &utf16charCounter)
@@ -94,6 +100,7 @@ private:
    void scanStringLiteral(Token *tok, unsigned char hint = 0);
    void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
    bool scanUntilRawStringLiteralEndPrecise();
    bool scanUntilRawStringLiteralEndSimple();
    void scanCharLiteral(Token *tok, unsigned char hint = 0);
    void scanUntilQuote(Token *tok, unsigned char quote);
@@ -134,6 +141,7 @@ private:
    TranslationUnit *_translationUnit;
    Control *_control;
    QByteArray _expectedRawStringSuffix;
    const char *_firstChar;
    const char *_currentChar;
    const char *_lastChar;
--- a/src/libs/cplusplus/SimpleLexer.cpp
+++ b/src/libs/cplusplus/SimpleLexer.cpp
@@ -67,6 +67,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
    const char *lastChar = firstChar + bytes.size();
    Lexer lex(firstChar, lastChar);
    lex.setExpectedRawStringSuffix(_expectedRawStringSuffix);
    lex.setLanguageFeatures(_languageFeatures);
    lex.setStartWithNewline(true);
    lex.setPreprocessorMode(_ppMode);
@@ -108,6 +109,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
    }
    _lastState = lex.state();
    _expectedRawStringSuffix = lex.expectedRawStringSuffix();
    return tokens;
 }
--- a/src/libs/cplusplus/SimpleLexer.h
+++ b/src/libs/cplusplus/SimpleLexer.h
@@ -59,6 +59,10 @@ public:
    int state() const
    { return _lastState; }
    QByteArray expectedRawStringSuffix() const { return _expectedRawStringSuffix; }
    void setExpectedRawStringSuffix(const QByteArray &suffix)
    { _expectedRawStringSuffix = suffix; }
    static int tokenAt(const Tokens &tokens, int utf16charsOffset);
    static Token tokenAt(const QString &text,
                         int utf16charsOffset,
@@ -68,6 +72,7 @@ public:
    static int tokenBefore(const Tokens &tokens, int utf16charsOffset);
 private:
    QByteArray _expectedRawStringSuffix;
    int _lastState;
    LanguageFeatures _languageFeatures;
    bool _skipComments: 1;
--- a/src/plugins/cppeditor/cpphighlighter.cpp
+++ b/src/plugins/cppeditor/cpphighlighter.cpp
@@ -61,6 +61,9 @@ void CppHighlighter::highlightBlock(const QString &text)
    SimpleLexer tokenize;
    tokenize.setLanguageFeatures(m_languageFeatures);
    const QTextBlock prevBlock = currentBlock().previous();
    if (prevBlock.isValid())
        tokenize.setExpectedRawStringSuffix(TextDocumentLayout::expectedRawStringSuffix(prevBlock));
    int initialLexerState = lexerState;
    const Tokens tokens = tokenize(text, initialLexerState);
@@ -99,7 +102,6 @@ void CppHighlighter::highlightBlock(const QString &text)
    bool expectPreprocessorKeyword = false;
    bool onlyHighlightComments = false;
    bool blockHasPreprocessorDirective = false;
    for (int i = 0; i < tokens.size(); ++i) {
        const Token &tk = tokens.at(i);
@@ -157,7 +159,6 @@ void CppHighlighter::highlightBlock(const QString &text)
            setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
                          formatForCategory(C_PREPROCESSOR));
            expectPreprocessorKeyword = true;
            blockHasPreprocessorDirective = true;
        } else if (highlightCurrentWordAsPreprocessor && (tk.isKeyword() || tk.is(T_IDENTIFIER))
                   && isPPKeyword(Utils::midView(text, tk.utf16charsBegin(), tk.utf16chars()))) {
            setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_PREPROCESSOR));
@@ -171,22 +172,7 @@ void CppHighlighter::highlightBlock(const QString &text)
        } else if (tk.is(T_NUMERIC_LITERAL)) {
            setFormat(tk.utf16charsBegin(), tk.utf16chars(), formatForCategory(C_NUMBER));
        } else if (tk.isStringLiteral() || tk.isCharLiteral()) {
-            // Our highlighting is broken for multi-line raw string literals, so if a superior
+            if (!highlightRawStringLiteral(text, tk)) {
            // option is available, don't do anything.
            // Note that this does not just save unneeded work, but can actually be required,
            // because mis-detected strings are not necessarily overwritten by the semantic
            // highlighter. Example:
            // const char *s = R"delim(
            //    line1
            //    "line)"  // <- is misdeteced by SimpleLexer as end of raw string literal
            //    line3    // <- erroneously not formatted by us, but that would be ok;
            //             //    the semantic highlighter does it for us later
            //    )delim"; // <- end quote is erroneously interpreted as *start* of a string,
            //             //    and because clangd does not include punctuation in its semantic
            //             //    tokens, the semicolon would stay formatted as a string even
            //             //    after the semantic highlighter has run.
            if ((!CppModelManager::instance()->isClangCodeModelActive()
                 || blockHasPreprocessorDirective) && !highlightRawStringLiteral(text, tk)) {
                setFormatWithSpaces(text, tk.utf16charsBegin(), tk.utf16chars(),
                                    formatForCategory(C_STRING));
            }
@@ -288,6 +274,8 @@ void CppHighlighter::highlightBlock(const QString &text)
    }
    setCurrentBlockState((braceDepth << 8) | tokenize.state());
    TextDocumentLayout::setExpectedRawStringSuffix(currentBlock(),
                                                   tokenize.expectedRawStringSuffix());
 }
 void CppHighlighter::setLanguageFeatures(const LanguageFeatures &languageFeatures)
--- a/src/plugins/texteditor/textdocumentlayout.cpp
+++ b/src/plugins/texteditor/textdocumentlayout.cpp
@@ -523,6 +523,22 @@ void TextDocumentLayout::setFolded(const QTextBlock &block, bool folded)
        emit layout->foldChanged(block.blockNumber(), folded);
 }
 void TextDocumentLayout::setExpectedRawStringSuffix(const QTextBlock &block,
                                                    const QByteArray &suffix)
 {
    if (TextBlockUserData * const data = textUserData(block))
        data->setExpectedRawStringSuffix(suffix);
    else if (!suffix.isEmpty())
        userData(block)->setExpectedRawStringSuffix(suffix);
 }
 QByteArray TextDocumentLayout::expectedRawStringSuffix(const QTextBlock &block)
 {
    if (TextBlockUserData *userData = textUserData(block))
        return userData->expectedRawStringSuffix();
    return {};
 }
 void TextDocumentLayout::requestExtraAreaUpdate()
 {
    emit updateExtraArea();
--- a/src/plugins/texteditor/textdocumentlayout.h
+++ b/src/plugins/texteditor/textdocumentlayout.h
@@ -145,6 +145,9 @@ public:
    KSyntaxHighlighting::State syntaxState() { return m_syntaxState; }
    void setSyntaxState(KSyntaxHighlighting::State state) { m_syntaxState = state; }
    QByteArray expectedRawStringSuffix() { return m_expectedRawStringSuffix; }
    void setExpectedRawStringSuffix(const QByteArray &suffix) { m_expectedRawStringSuffix = suffix; }
 private:
    TextMarks m_marks;
    int m_foldingIndent : 16;
@@ -157,6 +160,7 @@ private:
    Parentheses m_parentheses;
    CodeFormatterData *m_codeFormatterData;
    KSyntaxHighlighting::State m_syntaxState;
    QByteArray m_expectedRawStringSuffix; // A bit C++-specific, but let's be pragmatic.
 };
@@ -188,6 +192,8 @@ public:
    static void doFoldOrUnfold(const QTextBlock& block, bool unfold);
    static bool isFolded(const QTextBlock &block);
    static void setFolded(const QTextBlock &block, bool folded);
    static void setExpectedRawStringSuffix(const QTextBlock &block, const QByteArray &suffix);
    static QByteArray expectedRawStringSuffix(const QTextBlock &block);
    class TEXTEDITOR_EXPORT FoldValidator
    {