forked from qt-creator/qt-creator
qmljs: handle string templates in scanner
* add (multiline) template string support templates can be nested, which means that the scanner/lexer cannot be a simple state machine anymore, but should have a stack to store the state (the number of open braces in the current template string). The lexer stare is currently stored in an int, so we abuse that and store a the number of open braces (maximum 0x7f = 127) for at most 5 nested templates in the int after the flags for the multiline comments and strings. * improve representation of delimiters (==, <=, ||,... not split in separate delimiters) * (QmlDom backport) Change-Id: I2b4d23b65febedef29a748f4c5f377fde27bd7fd Fixes: QTCREATORBUG-22766 Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
This commit is contained in:
@@ -183,6 +183,22 @@ static inline void setRegexpMayFollow(int *state, bool on)
|
|||||||
*state = (on ? Scanner::RegexpMayFollow : 0) | (*state & ~Scanner::RegexpMayFollow);
|
*state = (on ? Scanner::RegexpMayFollow : 0) | (*state & ~Scanner::RegexpMayFollow);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int templateExpressionDepth(int state)
|
||||||
|
{
|
||||||
|
if ((state & Scanner::TemplateExpressionOpenBracesMask) == 0)
|
||||||
|
return 0;
|
||||||
|
if ((state & (Scanner::TemplateExpressionOpenBracesMask3 | Scanner::TemplateExpressionOpenBracesMask4)) == 0) {
|
||||||
|
if ((state & Scanner::TemplateExpressionOpenBracesMask2) == 0)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if ((state & Scanner::TemplateExpressionOpenBracesMask4) == 0)
|
||||||
|
return 3;
|
||||||
|
else
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
QList<Token> Scanner::operator()(const QString &text, int startState)
|
QList<Token> Scanner::operator()(const QString &text, int startState)
|
||||||
{
|
{
|
||||||
_state = startState;
|
_state = startState;
|
||||||
@@ -190,6 +206,45 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
|
auto scanTemplateString = [&index, &text, &tokens, this](int startShift = 0){
|
||||||
|
const QChar quote = QLatin1Char('`');
|
||||||
|
const int start = index + startShift;
|
||||||
|
while (index < text.length()) {
|
||||||
|
const QChar ch = text.at(index);
|
||||||
|
|
||||||
|
if (ch == quote)
|
||||||
|
break;
|
||||||
|
else if (ch == QLatin1Char('$') && index + 1 < text.length() && text.at(index + 1) == QLatin1Char('{')) {
|
||||||
|
tokens.append(Token(start, index - start, Token::String));
|
||||||
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
|
index += 2;
|
||||||
|
setRegexpMayFollow(&_state, true);
|
||||||
|
setMultiLineState(&_state, Normal);
|
||||||
|
int depth = templateExpressionDepth(_state);
|
||||||
|
if (depth == 4) {
|
||||||
|
qWarning() << "QQmljs::Dom::Scanner reached maximum nesting of template expressions (4), parsing will fail";
|
||||||
|
} else {
|
||||||
|
_state |= 1 << (4 + depth * 7);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else if (ch == QLatin1Char('\\') && index + 1 < text.length())
|
||||||
|
index += 2;
|
||||||
|
else
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index < text.length()) {
|
||||||
|
setMultiLineState(&_state, Normal);
|
||||||
|
++index;
|
||||||
|
// good one
|
||||||
|
} else {
|
||||||
|
setMultiLineState(&_state, MultiLineStringBQuote);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens.append(Token(start, index - start, Token::String));
|
||||||
|
setRegexpMayFollow(&_state, false);
|
||||||
|
};
|
||||||
|
|
||||||
if (multiLineState(_state) == MultiLineComment) {
|
if (multiLineState(_state) == MultiLineComment) {
|
||||||
int start = -1;
|
int start = -1;
|
||||||
while (index < text.length()) {
|
while (index < text.length()) {
|
||||||
@@ -233,8 +288,14 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
if (start < index)
|
if (start < index)
|
||||||
tokens.append(Token(start, index - start, Token::String));
|
tokens.append(Token(start, index - start, Token::String));
|
||||||
setRegexpMayFollow(&_state, false);
|
setRegexpMayFollow(&_state, false);
|
||||||
|
} else if (multiLineState(_state) == MultiLineStringBQuote) {
|
||||||
|
scanTemplateString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto braceCounterOffset = [](int templateDepth) {
|
||||||
|
return FlagsBits + (templateDepth - 1) * BraceCounterBits;
|
||||||
|
};
|
||||||
|
|
||||||
while (index < text.length()) {
|
while (index < text.length()) {
|
||||||
const QChar ch = text.at(index);
|
const QChar ch = text.at(index);
|
||||||
|
|
||||||
@@ -273,6 +334,9 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
tokens.append(Token(index, end - index, Token::RegExp));
|
tokens.append(Token(index, end - index, Token::RegExp));
|
||||||
index = end;
|
index = end;
|
||||||
setRegexpMayFollow(&_state, false);
|
setRegexpMayFollow(&_state, false);
|
||||||
|
} else if (la == QLatin1Char('=')) {
|
||||||
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
|
index += 2;
|
||||||
} else {
|
} else {
|
||||||
tokens.append(Token(index++, 1, Token::Delimiter));
|
tokens.append(Token(index++, 1, Token::Delimiter));
|
||||||
setRegexpMayFollow(&_state, true);
|
setRegexpMayFollow(&_state, true);
|
||||||
@@ -280,7 +344,6 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '\'':
|
case '\'':
|
||||||
case '`':
|
|
||||||
case '"': {
|
case '"': {
|
||||||
const QChar quote = ch;
|
const QChar quote = ch;
|
||||||
const int start = index;
|
const int start = index;
|
||||||
@@ -310,6 +373,11 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
setRegexpMayFollow(&_state, false);
|
setRegexpMayFollow(&_state, false);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
case '`': {
|
||||||
|
++index;
|
||||||
|
scanTemplateString(-1);
|
||||||
|
} break;
|
||||||
|
|
||||||
case '.':
|
case '.':
|
||||||
if (la.isDigit()) {
|
if (la.isDigit()) {
|
||||||
const int start = index;
|
const int start = index;
|
||||||
@@ -343,15 +411,38 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
setRegexpMayFollow(&_state, false);
|
setRegexpMayFollow(&_state, false);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '{':
|
case '{':{
|
||||||
tokens.append(Token(index++, 1, Token::LeftBrace));
|
tokens.append(Token(index++, 1, Token::LeftBrace));
|
||||||
setRegexpMayFollow(&_state, true);
|
setRegexpMayFollow(&_state, true);
|
||||||
break;
|
int depth = templateExpressionDepth(_state);
|
||||||
|
if (depth > 0) {
|
||||||
|
int shift = braceCounterOffset(depth);
|
||||||
|
int mask = Scanner::TemplateExpressionOpenBracesMask0 << shift;
|
||||||
|
if ((_state & mask) == mask) {
|
||||||
|
qWarning() << "QQmljs::Dom::Scanner reached maximum open braces of template expressions (127), parsing will fail";
|
||||||
|
} else {
|
||||||
|
_state = (_state & ~mask) | (((Scanner::TemplateExpressionOpenBracesMask0 & (_state >> shift)) + 1) << shift);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
|
||||||
case '}':
|
case '}': {
|
||||||
tokens.append(Token(index++, 1, Token::RightBrace));
|
|
||||||
setRegexpMayFollow(&_state, false);
|
setRegexpMayFollow(&_state, false);
|
||||||
break;
|
int depth = templateExpressionDepth(_state);
|
||||||
|
if (depth > 0) {
|
||||||
|
int shift = braceCounterOffset(depth);
|
||||||
|
int s = _state;
|
||||||
|
int nBraces = Scanner::TemplateExpressionOpenBracesMask0 & (s >> shift);
|
||||||
|
int mask = Scanner::TemplateExpressionOpenBracesMask0 << shift;
|
||||||
|
_state = (s & ~mask) | ((nBraces - 1) << shift);
|
||||||
|
if (nBraces == 1) {
|
||||||
|
tokens.append(Token(index++, 1, Token::Delimiter));
|
||||||
|
scanTemplateString();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens.append(Token(index++, 1, Token::RightBrace));
|
||||||
|
} break;
|
||||||
|
|
||||||
case ';':
|
case ';':
|
||||||
tokens.append(Token(index++, 1, Token::Semicolon));
|
tokens.append(Token(index++, 1, Token::Semicolon));
|
||||||
@@ -370,6 +461,32 @@ QList<Token> Scanner::operator()(const QString &text, int startState)
|
|||||||
|
|
||||||
case '+':
|
case '+':
|
||||||
case '-':
|
case '-':
|
||||||
|
case '<':
|
||||||
|
if (la == ch || la == QLatin1Char('=')) {
|
||||||
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
|
index += 2;
|
||||||
|
} else {
|
||||||
|
tokens.append(Token(index++, 1, Token::Delimiter));
|
||||||
|
}
|
||||||
|
setRegexpMayFollow(&_state, true);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '>':
|
||||||
|
if (la == ch && index + 2 < text.length() && text.at(index + 2) == ch) {
|
||||||
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
|
index += 3;
|
||||||
|
} else if (la == ch || la == QLatin1Char('=')) {
|
||||||
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
|
index += 2;
|
||||||
|
} else {
|
||||||
|
tokens.append(Token(index++, 1, Token::Delimiter));
|
||||||
|
}
|
||||||
|
setRegexpMayFollow(&_state, true);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '|':
|
||||||
|
case '=':
|
||||||
|
case '&':
|
||||||
if (la == ch) {
|
if (la == ch) {
|
||||||
tokens.append(Token(index, 2, Token::Delimiter));
|
tokens.append(Token(index, 2, Token::Delimiter));
|
||||||
index += 2;
|
index += 2;
|
||||||
|
@@ -62,23 +62,60 @@ public:
|
|||||||
inline bool is(int k) const { return k == kind; }
|
inline bool is(int k) const { return k == kind; }
|
||||||
inline bool isNot(int k) const { return k != kind; }
|
inline bool isNot(int k) const { return k != kind; }
|
||||||
|
|
||||||
|
static int compare(const Token &t1, const Token &t2) {
|
||||||
|
if (int c = t1.offset - t2.offset)
|
||||||
|
return c;
|
||||||
|
if (int c = t1.length - t2.length)
|
||||||
|
return c;
|
||||||
|
return int(t1.kind) - int(t2.kind);
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int offset;
|
int offset = 0;
|
||||||
int length;
|
int length = 0;
|
||||||
Kind kind;
|
Kind kind = EndOfFile;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline int operator == (const Token &t1, const Token &t2) {
|
||||||
|
return Token::compare(t1, t2) == 0;
|
||||||
|
}
|
||||||
|
inline int operator != (const Token &t1, const Token &t2) {
|
||||||
|
return Token::compare(t1, t2) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
class QMLJS_EXPORT Scanner
|
class QMLJS_EXPORT Scanner
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
enum {
|
||||||
|
FlagsBits = 4,
|
||||||
|
BraceCounterBits = 7
|
||||||
|
};
|
||||||
enum {
|
enum {
|
||||||
Normal = 0,
|
Normal = 0,
|
||||||
MultiLineComment = 1,
|
MultiLineComment = 1,
|
||||||
MultiLineStringDQuote = 2,
|
MultiLineStringDQuote = 2,
|
||||||
MultiLineStringSQuote = 3,
|
MultiLineStringSQuote = 3,
|
||||||
MultiLineMask = 3,
|
MultiLineStringBQuote = 4,
|
||||||
|
MultiLineMask = 7,
|
||||||
|
|
||||||
RegexpMayFollow = 4 // flag that may be combined with the above
|
RegexpMayFollow = 8, // flag that may be combined with the above
|
||||||
|
|
||||||
|
// templates can be nested, which means that the scanner/lexer cannot
|
||||||
|
// be a simple state machine anymore, but should have a stack to store
|
||||||
|
// the state (the number of open braces in the current template
|
||||||
|
// string).
|
||||||
|
// The lexer stare is currently stored in an int, so we abuse that and
|
||||||
|
// store a the number of open braces (maximum 0x7f = 127) for at most 5
|
||||||
|
// nested templates in the int after the flags for the multiline
|
||||||
|
// comments and strings.
|
||||||
|
TemplateExpression = 0x1 << 4,
|
||||||
|
TemplateExpressionOpenBracesMask0 = 0x7F,
|
||||||
|
TemplateExpressionOpenBracesMask1 = 0x7F << 4,
|
||||||
|
TemplateExpressionOpenBracesMask2 = 0x7F << 11,
|
||||||
|
TemplateExpressionOpenBracesMask3 = 0x7F << 18,
|
||||||
|
TemplateExpressionOpenBracesMask4 = 0x7F << 25,
|
||||||
|
TemplateExpressionOpenBracesMask = TemplateExpressionOpenBracesMask1 | TemplateExpressionOpenBracesMask2
|
||||||
|
| TemplateExpressionOpenBracesMask3 | TemplateExpressionOpenBracesMask4
|
||||||
};
|
};
|
||||||
|
|
||||||
Scanner();
|
Scanner();
|
||||||
|
@@ -5,6 +5,11 @@ var a_var = 1
|
|||||||
let a_let = 2
|
let a_let = 2
|
||||||
const a_const = 3
|
const a_const = 3
|
||||||
|
|
||||||
|
const tmpl = `template` + `t${i + 6}` + `t${i + `nested${i}`}` + `t${function () {
|
||||||
|
return 5
|
||||||
|
}()}` + `t\${i}
|
||||||
|
${i + 2}`
|
||||||
|
|
||||||
function foo(a, b) {
|
function foo(a, b) {
|
||||||
x = 15
|
x = 15
|
||||||
x += 4
|
x += 4
|
||||||
|
Reference in New Issue
Block a user