From 44f1d183077460c3e6fa2e0b533abc68e341337d Mon Sep 17 00:00:00 2001 From: Tim Jenssen Date: Wed, 19 Jun 2019 19:42:15 +0200 Subject: [PATCH] qmljs: sync parser with current 5.12 state Task-number: QTCREATORBUG-22474 Change-Id: I86d7ee7cc28e95f814f2ba36551a36c8a59e1a79 Reviewed-by: Ulf Hermann --- src/libs/qmljs/parser/qmljslexer.cpp | 67 +++++++++++++++++++--------- src/libs/qmljs/parser/qmljslexer_p.h | 3 ++ 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/libs/qmljs/parser/qmljslexer.cpp b/src/libs/qmljs/parser/qmljslexer.cpp index 19d367be1ba..ab7a33917e0 100644 --- a/src/libs/qmljs/parser/qmljslexer.cpp +++ b/src/libs/qmljs/parser/qmljslexer.cpp @@ -116,6 +116,7 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode) _tokenText.reserve(1024); _errorMessage.clear(); _tokenSpell = QStringRef(); + _rawString = QStringRef(); _codePtr = code.unicode(); _endPtr = _codePtr + code.length(); @@ -149,13 +150,20 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode) void Lexer::scanChar() { - unsigned sequenceLength = isLineTerminatorSequence(); + if (_skipLinefeed) { + Q_ASSERT(*_codePtr == QLatin1Char('\n')); + ++_codePtr; + _skipLinefeed = false; + } _char = *_codePtr++; - if (sequenceLength == 2) - _char = *_codePtr++; - ++_currentColumnNumber; + if (isLineTerminator()) { + if (_char == QLatin1Char('\r')) { + if (_codePtr < _endPtr && *_codePtr == QLatin1Char('\n')) + _skipLinefeed = true; + _char = QLatin1Char('\n'); + } ++_currentLineNumber; _currentColumnNumber = 0; } @@ -232,6 +240,7 @@ int Lexer::lex() again: _tokenSpell = QStringRef(); + _rawString = QStringRef(); _tokenKind = scanToken(); _tokenLength = _codePtr - _tokenStartPtr - 1; @@ -807,12 +816,15 @@ int Lexer::scanString(ScanStringMode mode) QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode); bool multilineStringLiteral = false; - const QChar *startCode = _codePtr; + const QChar *startCode = _codePtr - 1; + // in case we just parsed a \r, we need to reset this flag to get things working + // correctly in the loop below and afterwards + _skipLinefeed = false; if (_engine) { while (_codePtr <= _endPtr) { - if (isLineTerminator() && quote != QLatin1Char('`')) { - if (qmlMode()) + if (isLineTerminator()) { + if ((quote == QLatin1Char('`') || qmlMode())) break; _errorCode = IllegalCharacter; _errorMessage = QCoreApplication::translate("QmlParser", "Stray newline in string literal"); @@ -822,7 +834,8 @@ int Lexer::scanString(ScanStringMode mode) } else if (_char == '$' && quote == QLatin1Char('`')) { break; } else if (_char == quote) { - _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode); + _tokenSpell = _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1); + _rawString = _tokenSpell; scanChar(); if (quote == QLatin1Char('`')) @@ -835,28 +848,36 @@ int Lexer::scanString(ScanStringMode mode) else return T_STRING_LITERAL; } - scanChar(); + // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result + _char = *_codePtr++; + ++_currentColumnNumber; } } + // rewind by one char, so things gets scanned correctly + --_codePtr; + _validTokenText = true; - _tokenText.resize(0); - startCode--; - while (startCode != _codePtr - 1) - _tokenText += *startCode++; + _tokenText = QString(startCode, _codePtr - startCode); + + auto setRawString = [&](const QChar *end) { + QString raw(startCode, end - startCode - 1); + raw.replace(QLatin1String("\r\n"), QLatin1String("\n")); + raw.replace(QLatin1Char('\r'), QLatin1Char('\n')); + _rawString = _engine->newStringRef(raw); + }; + + scanChar(); while (_codePtr <= _endPtr) { - if (unsigned sequenceLength = isLineTerminatorSequence()) { - multilineStringLiteral = true; - _tokenText += _char; - if (sequenceLength == 2) - _tokenText += *_codePtr; - scanChar(); - } else if (_char == mode) { + if (_char == quote) { scanChar(); - if (_engine) + if (_engine) { _tokenSpell = _engine->newStringRef(_tokenText); + if (quote == QLatin1Char('`')) + setRawString(_codePtr - 1); + } if (quote == QLatin1Char('`')) _bracesCount = _outerTemplateBraceCount.pop(); @@ -871,8 +892,10 @@ int Lexer::scanString(ScanStringMode mode) scanChar(); scanChar(); _bracesCount = 1; - if (_engine) + if (_engine) { _tokenSpell = _engine->newStringRef(_tokenText); + setRawString(_codePtr - 2); + } return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE); } else if (_char == QLatin1Char('\\')) { diff --git a/src/libs/qmljs/parser/qmljslexer_p.h b/src/libs/qmljs/parser/qmljslexer_p.h index 39128b44098..5773606c399 100644 --- a/src/libs/qmljs/parser/qmljslexer_p.h +++ b/src/libs/qmljs/parser/qmljslexer_p.h @@ -146,6 +146,7 @@ public: int tokenStartColumn() const { return _tokenColumn; } inline QStringRef tokenSpell() const { return _tokenSpell; } + inline QStringRef rawString() const { return _rawString; } double tokenValue() const { return _tokenValue; } QString tokenText() const; @@ -198,6 +199,7 @@ private: QString _tokenText; QString _errorMessage; QStringRef _tokenSpell; + QStringRef _rawString; const QChar *_codePtr; const QChar *_endPtr; @@ -233,6 +235,7 @@ private: bool _followsClosingBrace; bool _delimited; bool _qmlMode; + bool _skipLinefeed = false; int _generatorLevel = 0; bool _staticIsKeyword = false; };