qmljs: handle string templates in scanner

* add (multiline) template string support
  templates can be nested, which means that the scanner/lexer cannot
  be a simple state machine anymore, but should have a stack to store
  the state (the number of open braces in the current template
  string).
  The lexer stare is currently stored in an int, so we abuse that and
  store a the number of open braces (maximum 0x7f = 127) for at most 5
  nested templates in the int after the flags for the multiline
  comments and strings.
* improve representation of delimiters (==, <=, ||,... not split in
separate delimiters)
* (QmlDom backport)

Change-Id: I2b4d23b65febedef29a748f4c5f377fde27bd7fd
Fixes: QTCREATORBUG-22766
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
This commit is contained in:
Fawzi Mohamed
2021-02-19 16:30:21 +01:00
parent 28d6be9867
commit bd05384765
3 changed files with 170 additions and 11 deletions

View File

@@ -183,6 +183,22 @@ static inline void setRegexpMayFollow(int *state, bool on)
*state = (on ? Scanner::RegexpMayFollow : 0) | (*state & ~Scanner::RegexpMayFollow);
}
static inline int templateExpressionDepth(int state)
{
if ((state & Scanner::TemplateExpressionOpenBracesMask) == 0)
return 0;
if ((state & (Scanner::TemplateExpressionOpenBracesMask3 | Scanner::TemplateExpressionOpenBracesMask4)) == 0) {
if ((state & Scanner::TemplateExpressionOpenBracesMask2) == 0)
return 1;
else
return 2;
}
if ((state & Scanner::TemplateExpressionOpenBracesMask4) == 0)
return 3;
else
return 4;
}
QList<Token> Scanner::operator()(const QString &text, int startState)
{
_state = startState;
@@ -190,6 +206,45 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
int index = 0;
auto scanTemplateString = [&index, &text, &tokens, this](int startShift = 0){
const QChar quote = QLatin1Char('`');
const int start = index + startShift;
while (index < text.length()) {
const QChar ch = text.at(index);
if (ch == quote)
break;
else if (ch == QLatin1Char('$') && index + 1 < text.length() && text.at(index + 1) == QLatin1Char('{')) {
tokens.append(Token(start, index - start, Token::String));
tokens.append(Token(index, 2, Token::Delimiter));
index += 2;
setRegexpMayFollow(&_state, true);
setMultiLineState(&_state, Normal);
int depth = templateExpressionDepth(_state);
if (depth == 4) {
qWarning() << "QQmljs::Dom::Scanner reached maximum nesting of template expressions (4), parsing will fail";
} else {
_state |= 1 << (4 + depth * 7);
}
return;
} else if (ch == QLatin1Char('\\') && index + 1 < text.length())
index += 2;
else
++index;
}
if (index < text.length()) {
setMultiLineState(&_state, Normal);
++index;
// good one
} else {
setMultiLineState(&_state, MultiLineStringBQuote);
}
tokens.append(Token(start, index - start, Token::String));
setRegexpMayFollow(&_state, false);
};
if (multiLineState(_state) == MultiLineComment) {
int start = -1;
while (index < text.length()) {
@@ -233,8 +288,14 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
if (start < index)
tokens.append(Token(start, index - start, Token::String));
setRegexpMayFollow(&_state, false);
} else if (multiLineState(_state) == MultiLineStringBQuote) {
scanTemplateString();
}
auto braceCounterOffset = [](int templateDepth) {
return FlagsBits + (templateDepth - 1) * BraceCounterBits;
};
while (index < text.length()) {
const QChar ch = text.at(index);
@@ -273,6 +334,9 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
tokens.append(Token(index, end - index, Token::RegExp));
index = end;
setRegexpMayFollow(&_state, false);
} else if (la == QLatin1Char('=')) {
tokens.append(Token(index, 2, Token::Delimiter));
index += 2;
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
setRegexpMayFollow(&_state, true);
@@ -280,7 +344,6 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
break;
case '\'':
case '`':
case '"': {
const QChar quote = ch;
const int start = index;
@@ -310,6 +373,11 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
setRegexpMayFollow(&_state, false);
} break;
case '`': {
++index;
scanTemplateString(-1);
} break;
case '.':
if (la.isDigit()) {
const int start = index;
@@ -343,15 +411,38 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
setRegexpMayFollow(&_state, false);
break;
case '{':
case '{':{
tokens.append(Token(index++, 1, Token::LeftBrace));
setRegexpMayFollow(&_state, true);
break;
int depth = templateExpressionDepth(_state);
if (depth > 0) {
int shift = braceCounterOffset(depth);
int mask = Scanner::TemplateExpressionOpenBracesMask0 << shift;
if ((_state & mask) == mask) {
qWarning() << "QQmljs::Dom::Scanner reached maximum open braces of template expressions (127), parsing will fail";
} else {
_state = (_state & ~mask) | (((Scanner::TemplateExpressionOpenBracesMask0 & (_state >> shift)) + 1) << shift);
}
}
} break;
case '}':
tokens.append(Token(index++, 1, Token::RightBrace));
case '}': {
setRegexpMayFollow(&_state, false);
break;
int depth = templateExpressionDepth(_state);
if (depth > 0) {
int shift = braceCounterOffset(depth);
int s = _state;
int nBraces = Scanner::TemplateExpressionOpenBracesMask0 & (s >> shift);
int mask = Scanner::TemplateExpressionOpenBracesMask0 << shift;
_state = (s & ~mask) | ((nBraces - 1) << shift);
if (nBraces == 1) {
tokens.append(Token(index++, 1, Token::Delimiter));
scanTemplateString();
break;
}
}
tokens.append(Token(index++, 1, Token::RightBrace));
} break;
case ';':
tokens.append(Token(index++, 1, Token::Semicolon));
@@ -370,6 +461,32 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
case '+':
case '-':
case '<':
if (la == ch || la == QLatin1Char('=')) {
tokens.append(Token(index, 2, Token::Delimiter));
index += 2;
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
}
setRegexpMayFollow(&_state, true);
break;
case '>':
if (la == ch && index + 2 < text.length() && text.at(index + 2) == ch) {
tokens.append(Token(index, 2, Token::Delimiter));
index += 3;
} else if (la == ch || la == QLatin1Char('=')) {
tokens.append(Token(index, 2, Token::Delimiter));
index += 2;
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
}
setRegexpMayFollow(&_state, true);
break;
case '|':
case '=':
case '&':
if (la == ch) {
tokens.append(Token(index, 2, Token::Delimiter));
index += 2;

View File

@@ -62,23 +62,60 @@ public:
inline bool is(int k) const { return k == kind; }
inline bool isNot(int k) const { return k != kind; }
static int compare(const Token &t1, const Token &t2) {
if (int c = t1.offset - t2.offset)
return c;
if (int c = t1.length - t2.length)
return c;
return int(t1.kind) - int(t2.kind);
}
public:
int offset;
int length;
Kind kind;
int offset = 0;
int length = 0;
Kind kind = EndOfFile;
};
inline int operator == (const Token &t1, const Token &t2) {
return Token::compare(t1, t2) == 0;
}
inline int operator != (const Token &t1, const Token &t2) {
return Token::compare(t1, t2) != 0;
}
class QMLJS_EXPORT Scanner
{
public:
enum {
FlagsBits = 4,
BraceCounterBits = 7
};
enum {
Normal = 0,
MultiLineComment = 1,
MultiLineStringDQuote = 2,
MultiLineStringSQuote = 3,
MultiLineMask = 3,
MultiLineStringBQuote = 4,
MultiLineMask = 7,
RegexpMayFollow = 4 // flag that may be combined with the above
RegexpMayFollow = 8, // flag that may be combined with the above
// templates can be nested, which means that the scanner/lexer cannot
// be a simple state machine anymore, but should have a stack to store
// the state (the number of open braces in the current template
// string).
// The lexer stare is currently stored in an int, so we abuse that and
// store a the number of open braces (maximum 0x7f = 127) for at most 5
// nested templates in the int after the flags for the multiline
// comments and strings.
TemplateExpression = 0x1 << 4,
TemplateExpressionOpenBracesMask0 = 0x7F,
TemplateExpressionOpenBracesMask1 = 0x7F << 4,
TemplateExpressionOpenBracesMask2 = 0x7F << 11,
TemplateExpressionOpenBracesMask3 = 0x7F << 18,
TemplateExpressionOpenBracesMask4 = 0x7F << 25,
TemplateExpressionOpenBracesMask = TemplateExpressionOpenBracesMask1 | TemplateExpressionOpenBracesMask2
| TemplateExpressionOpenBracesMask3 | TemplateExpressionOpenBracesMask4
};
Scanner();

View File

@@ -5,6 +5,11 @@ var a_var = 1
let a_let = 2
const a_const = 3
const tmpl = `template` + `t${i + 6}` + `t${i + `nested${i}`}` + `t${function () {
return 5
}()}` + `t\${i}
${i + 2}`
function foo(a, b) {
x = 15
x += 4