Say hello to the new incremental scanner for QML/JS.

This commit is contained in:
Roberto Raggi
2010-01-28 13:12:52 +01:00
parent 7a379001cb
commit fa925ccd5a
5 changed files with 212 additions and 343 deletions

View File

@@ -31,6 +31,7 @@
#include <QtCore/QSet> #include <QtCore/QSet>
#include <QtCore/QtAlgorithms> #include <QtCore/QtAlgorithms>
#include <QtCore/QDebug>
using namespace QmlJS; using namespace QmlJS;
@@ -58,11 +59,10 @@ bool QScriptHighlighter::isDuiEnabled() const
void QScriptHighlighter::highlightBlock(const QString &text) void QScriptHighlighter::highlightBlock(const QString &text)
{ {
m_scanner(text, onBlockStart()); const QList<Token> tokens = m_scanner(text, onBlockStart());
QTextCharFormat emptyFormat; QTextCharFormat emptyFormat;
int lastEnd = 0; int lastEnd = 0;
const QList<Token> tokens = m_scanner.tokens();
for (int i = 0; i < tokens.size(); ++i) { for (int i = 0; i < tokens.size(); ++i) {
const Token token = tokens.at(i); const Token token = tokens.at(i);
@@ -111,7 +111,7 @@ void QScriptHighlighter::highlightBlock(const QString &text)
break; break;
case Token::Identifier: case Token::Identifier:
if (m_duiEnabled && (i + 1 != tokens.size()) && tokens.at(i + 1).kind == Token::Colon) { if (m_duiEnabled && (i + 1) < tokens.size() && tokens.at(i + 1).is(Token::Colon)) {
int j = i; int j = i;
for (; j != -1; --j) { for (; j != -1; --j) {
const Token &tok = tokens.at(j); const Token &tok = tokens.at(j);
@@ -138,8 +138,7 @@ void QScriptHighlighter::highlightBlock(const QString &text)
setFormat(token.offset, token.length, emptyFormat); setFormat(token.offset, token.length, emptyFormat);
break; break;
case Token::Operator: case Token::Delimiter:
case Token::Dot:
setFormat(token.offset, token.length, emptyFormat); setFormat(token.offset, token.length, emptyFormat);
break; break;
@@ -150,13 +149,21 @@ void QScriptHighlighter::highlightBlock(const QString &text)
lastEnd = token.end(); lastEnd = token.end();
} }
const int firstNonSpace = m_scanner.firstNonSpace(); int firstNonSpace = 0;
if (! tokens.isEmpty()) {
const Token &tk = tokens.first();
firstNonSpace = tk.offset;
}
if (firstNonSpace > lastEnd) if (firstNonSpace > lastEnd)
setFormat(lastEnd, firstNonSpace - lastEnd, m_formats[VisualWhitespace]); setFormat(lastEnd, firstNonSpace - lastEnd, m_formats[VisualWhitespace]);
else if (text.length() > lastEnd) else if (text.length() > lastEnd)
setFormat(lastEnd, text.length() - lastEnd, m_formats[VisualWhitespace]); setFormat(lastEnd, text.length() - lastEnd, m_formats[VisualWhitespace]);
onBlockEnd(m_scanner.endState(), firstNonSpace); onBlockEnd(m_scanner.endState(), firstNonSpace);
setCurrentBlockState(m_scanner.endState());
} }
void QScriptHighlighter::setFormats(const QVector<QTextCharFormat> &s) void QScriptHighlighter::setFormats(const QVector<QTextCharFormat> &s)
@@ -237,15 +244,20 @@ QSet<QString> QScriptHighlighter::keywords()
int QScriptHighlighter::onBlockStart() int QScriptHighlighter::onBlockStart()
{ {
int state = 0; return currentBlockState();
int previousState = previousBlockState(); }
if (previousState != -1)
state = previousState; void QScriptHighlighter::onBlockEnd(int, int)
return state; {
}
void QScriptHighlighter::onOpeningParenthesis(QChar, int)
{
}
void QScriptHighlighter::onClosingParenthesis(QChar, int)
{
} }
void QScriptHighlighter::onOpeningParenthesis(QChar, int) {}
void QScriptHighlighter::onClosingParenthesis(QChar, int) {}
void QScriptHighlighter::onBlockEnd(int state, int) { return setCurrentBlockState(state); }
void QScriptHighlighter::highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat) void QScriptHighlighter::highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat)
{ {

View File

@@ -61,13 +61,13 @@ public:
QSet<QString> keywords(); QSet<QString> keywords();
protected: protected:
virtual int onBlockStart();
virtual void onBlockEnd(int state, int firstNonSpace);
// The functions are notified whenever parentheses are encountered. // The functions are notified whenever parentheses are encountered.
// Custom behaviour can be added, for example storing info for indenting. // Custom behaviour can be added, for example storing info for indenting.
virtual int onBlockStart(); // returns the blocks initial state
virtual void onOpeningParenthesis(QChar parenthesis, int pos); virtual void onOpeningParenthesis(QChar parenthesis, int pos);
virtual void onClosingParenthesis(QChar parenthesis, int pos); virtual void onClosingParenthesis(QChar parenthesis, int pos);
// sets the enriched user state, or simply calls setCurrentBlockState(state);
virtual void onBlockEnd(int state, int firstNonSpace);
virtual void highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat); virtual void highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat);

View File

@@ -256,7 +256,7 @@ QString QmlJSIndenter::trimmedCodeLine(const QString &t)
case Token::LeftParenthesis: case Token::LeftParenthesis:
case Token::LeftBrace: case Token::LeftBrace:
case Token::Semicolon: case Token::Semicolon:
case Token::Operator: case Token::Delimiter:
break; break;
case Token::RightParenthesis: case Token::RightParenthesis:

View File

@@ -34,330 +34,209 @@
using namespace QmlJS; using namespace QmlJS;
QmlJSScanner::QmlJSScanner() QmlJSScanner::QmlJSScanner()
: m_state(0)
{ {
reset();
} }
QmlJSScanner::~QmlJSScanner() QmlJSScanner::~QmlJSScanner()
{}
void QmlJSScanner::reset()
{ {
m_endState = -1; }
m_firstNonSpace = -1;
m_tokens.clear(); static bool isIdentifierChar(QChar ch)
{
switch (ch.unicode()) {
case '$': case '_':
return true;
default:
return ch.isLetterOrNumber();
}
}
static bool isNumberChar(QChar ch)
{
switch (ch.unicode()) {
case '.':
case 'e':
case 'E': // ### more...
return true;
default:
return ch.isLetterOrNumber();
}
} }
QList<Token> QmlJSScanner::operator()(const QString &text, int startState) QList<Token> QmlJSScanner::operator()(const QString &text, int startState)
{ {
reset();
// tokens
enum TokenKind {
InputAlpha,
InputNumber,
InputAsterix,
InputSlash,
InputSpace,
InputQuotation,
InputApostrophe,
InputSep,
NumInputs
};
// states
enum { enum {
StateStandard, Normal = 0,
StateCommentStart1, // '/' MultiLineComment = 1
StateCCommentStart2, // '*' after a '/'
StateCppCommentStart2, // '/' after a '/'
StateCComment, // after a "/*"
StateCppComment, // after a "//"
StateCCommentEnd1, // '*' in a CppComment
StateCCommentEnd2, // '/' after a '*' in a CppComment
StateStringStart,
StateString,
StateStringEnd,
StateString2Start,
StateString2,
StateString2End,
StateNumber,
NumStates
}; };
static const uchar table[NumStates][NumInputs] = { m_state = startState;
// InputAlpha InputNumber InputAsterix InputSlash InputSpace InputQuotation InputApostrophe InputSep QList<Token> tokens;
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStandard
{ StateStandard, StateNumber, StateCCommentStart2, StateCppCommentStart2, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCommentStart1
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentStart2
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppCommentStart2
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCComment
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppComment
{ StateCComment, StateCComment, StateCCommentEnd1, StateCCommentEnd2, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentEnd1
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCCommentEnd2
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateStringStart
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateString
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStringEnd
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2Start
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateString2End
{ StateNumber, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard } // StateNumber
};
int state = startState; // ### handle multi line comment state.
if (text.isEmpty()) {
blockEnd(state, 0); int index = 0;
return m_tokens;
if (m_state == MultiLineComment) {
const int start = index;
while (index < text.length()) {
const QChar ch = text.at(index);
QChar la;
if (index + 1 < text.length())
la = text.at(index + 1);
if (ch == QLatin1Char('*') && la == QLatin1Char('/')) {
m_state = Normal;
index += 2;
break;
} else {
++index;
}
}
tokens.append(Token(start, index - start, Token::Comment));
} }
int input = -1; while (index < text.length()) {
int i = 0; const QChar ch = text.at(index);
bool lastWasBackSlash = false;
bool makeLastStandard = false;
static const QString alphabeth = QLatin1String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); QChar la; // lookahead char
static const QString mathChars = QString::fromLatin1("xXeE"); if (index + 1 < text.length())
static const QString numbers = QString::fromLatin1("0123456789"); la = text.at(index + 1);
QChar lastChar;
int firstNonSpace = -1; switch (ch.unicode()) {
int lastNonSpace = -1; case '/':
if (la == QLatin1Char('/')) {
tokens.append(Token(index, text.length() - index, Token::Comment));
index = text.length();
} else if (la == QLatin1Char('*')) {
const int start = index;
index += 2;
m_state = MultiLineComment;
while (index < text.length()) {
const QChar ch = text.at(index);
QChar la;
if (index + 1 < text.length())
la = text.at(index + 1);
forever { if (ch == QLatin1Char('*') && la == QLatin1Char('/')) {
const QChar qc = text.at(i); m_state = Normal;
const char c = qc.toLatin1(); index += 2;
break;
if (lastWasBackSlash) {
input = InputSep;
} else {
switch (c) {
case '*':
input = InputAsterix;
break;
case '/':
input = InputSlash;
break;
case '"':
input = InputQuotation;
break;
case '\'':
input = InputApostrophe;
break;
case ' ':
input = InputSpace;
break;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '0':
if (alphabeth.contains(lastChar) && (!mathChars.contains(lastChar) || !numbers.contains(text.at(i - 1)))) {
input = InputAlpha;
} else { } else {
if (input == InputAlpha && numbers.contains(lastChar)) ++index;
input = InputAlpha;
else
input = InputNumber;
} }
break;
case '.':
if (state == StateNumber)
input = InputNumber;
else
input = InputSep;
break;
default: {
if (qc.isLetter() || c == '_')
input = InputAlpha;
else
input = InputSep;
break;
} }
tokens.append(Token(start, index - start, Token::Comment));
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
} }
}
if (input != InputSpace) {
if (firstNonSpace < 0)
firstNonSpace = i;
lastNonSpace = i;
}
lastWasBackSlash = !lastWasBackSlash && c == '\\';
state = table[state][input];
switch (state) {
case StateStandard: {
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
if (input == InputAlpha ) {
insertIdentifier(i);
} else if (input == InputSep || input == InputAsterix) {
insertCharToken(i, c);
}
break;
}
case StateCommentStart1:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = true;
break;
case StateCCommentStart2:
makeLastStandard = false;
insertComment(i - 1, 2);
break;
case StateCppCommentStart2:
insertComment(i - 1, 2);
makeLastStandard = false;
break;
case StateCComment:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCppComment:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCCommentEnd1:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateCCommentEnd2:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertComment(i, 1);
break;
case StateStringStart:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateStringEnd:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2Start:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateString2End:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertString(i);
break;
case StateNumber:
if (makeLastStandard)
insertCharToken(i - 1, text.at(i - 1).toAscii());
makeLastStandard = false;
insertNumber(i);
break;
}
lastChar = qc;
i++;
if (i >= text.length())
break; break;
case '\'':
case '"': {
const QChar quote = ch;
const int start = index;
++index;
while (index < text.length()) {
const QChar ch = text.at(index);
if (ch == quote)
break;
else if (index + 1 < text.length() && ch == QLatin1Char('\\'))
index += 2;
else
++index;
}
if (index < text.length()) {
++index;
// good one
} else {
// unfinished
}
tokens.append(Token(start, index - start, Token::String));
} break;
case '.':
if (la.isDigit()) {
const int start = index;
do {
++index;
} while (index < text.length() && isNumberChar(text.at(index)));
tokens.append(Token(start, index - start, Token::Number));
break;
}
tokens.append(Token(index++, 1, Token::Dot));
break;
case '(':
tokens.append(Token(index++, 1, Token::LeftParenthesis));
break;
case ')':
tokens.append(Token(index++, 1, Token::RightParenthesis));
break;
case '[':
tokens.append(Token(index++, 1, Token::LeftBracket));
break;
case ']':
tokens.append(Token(index++, 1, Token::RightBracket));
break;
case '{':
tokens.append(Token(index++, 1, Token::LeftBrace));
break;
case '}':
tokens.append(Token(index++, 1, Token::RightBrace));
break;
case ';':
tokens.append(Token(index++, 1, Token::Semicolon));
break;
case ':':
tokens.append(Token(index++, 1, Token::Colon));
break;
case ',':
tokens.append(Token(index++, 1, Token::Comma));
break;
default:
if (ch.isNumber()) {
const int start = index;
do {
++index;
} while (index < text.length() && isNumberChar(text.at(index)));
tokens.append(Token(start, index - start, Token::Number));
} else if (ch.isLetter() || ch == QLatin1Char('_') || ch == QLatin1Char('$')) {
const int start = index;
do {
++index;
} while (index < text.length() && isIdentifierChar(text.at(index)));
if (isKeyword(text.mid(start, index - start)))
tokens.append(Token(start, index - start, Token::Keyword)); // ### fixme
else
tokens.append(Token(start, index - start, Token::Identifier));
} else {
tokens.append(Token(index++, 1, Token::Delimiter));
}
} // end of switch
} }
scanForKeywords(text); return tokens;
if (state == StateCComment
|| state == StateCCommentEnd1
|| state == StateCCommentStart2
) {
state = StateCComment;
} else {
state = StateStandard;
}
blockEnd(state, firstNonSpace);
return m_tokens;
} }
void QmlJSScanner::insertToken(int start, int length, Token::Kind kind, bool forceNewToken) bool QmlJSScanner::isKeyword(const QString &text) const
{ {
if (m_tokens.isEmpty() || forceNewToken) { return m_keywords.contains(text);
m_tokens.append(Token(start, length, kind));
} else {
Token &lastToken(m_tokens.last());
if (lastToken.kind == kind && lastToken.end() == start) {
lastToken.length += 1;
} else {
m_tokens.append(Token(start, length, kind));
}
}
}
void QmlJSScanner::insertCharToken(int start, const char c)
{
Token::Kind kind;
switch (c) {
case '!':
case '<':
case '>':
case '+':
case '-':
case '*':
case '/':
case '%': kind = Token::Operator; break;
case ';': kind = Token::Semicolon; break;
case ':': kind = Token::Colon; break;
case ',': kind = Token::Comma; break;
case '.': kind = Token::Dot; break;
case '(': kind = Token::LeftParenthesis; break;
case ')': kind = Token::RightParenthesis; break;
case '{': kind = Token::LeftBrace; break;
case '}': kind = Token::RightBrace; break;
case '[': kind = Token::LeftBracket; break;
case ']': kind = Token::RightBracket; break;
default: kind = Token::Identifier; break;
}
insertToken(start, 1, kind, true);
}
void QmlJSScanner::scanForKeywords(const QString &text)
{
for (int i = 0; i < m_tokens.length(); ++i) {
Token &t(m_tokens[i]);
if (t.kind != Token::Identifier)
continue;
const QString id = text.mid(t.offset, t.length);
if (m_keywords.contains(id))
t.kind = Token::Keyword;
}
} }

View File

@@ -54,11 +54,11 @@ public:
RightBrace, RightBrace,
LeftBracket, LeftBracket,
RightBracket, RightBracket,
Operator,
Semicolon, Semicolon,
Colon, Colon,
Comma, Comma,
Dot Dot,
Delimiter
}; };
inline Token(): offset(0), length(0), kind(EndOfFile) {} inline Token(): offset(0), length(0), kind(EndOfFile) {}
@@ -83,39 +83,17 @@ public:
void setKeywords(const QSet<QString> keywords) void setKeywords(const QSet<QString> keywords)
{ m_keywords = keywords; } { m_keywords = keywords; }
void reset();
QList<Token> operator()(const QString &text, int startState = 0); QList<Token> operator()(const QString &text, int startState = 0);
int endState() const int endState() const
{ return m_endState; } { return m_state; }
int firstNonSpace() const
{ return m_firstNonSpace; }
QList<Token> tokens() const
{ return m_tokens; }
private: private:
void blockEnd(int state, int firstNonSpace) bool isKeyword(const QString &text) const;
{ m_endState = state; m_firstNonSpace = firstNonSpace; }
void insertString(int start)
{ insertToken(start, 1, Token::String, false); }
void insertComment(int start, int length)
{ insertToken(start, length, Token::Comment, false); }
void insertCharToken(int start, const char c);
void insertIdentifier(int start)
{ insertToken(start, 1, Token::Identifier, false); }
void insertNumber(int start)
{ insertToken(start, 1, Token::Number, false); }
void insertToken(int start, int length, Token::Kind kind, bool forceNewToken);
void scanForKeywords(const QString &text);
private: private:
QSet<QString> m_keywords; QSet<QString> m_keywords;
int m_endState; int m_state;
int m_firstNonSpace;
QList<Token> m_tokens;
}; };
} // namespace QmlJS } // namespace QmlJS