2009-09-28 14:49:39 +02:00
|
|
|
#include "qscriptincrementalscanner.h"
|
|
|
|
|
|
|
|
|
|
#include <QTextCharFormat>
|
|
|
|
|
|
|
|
|
|
using namespace SharedTools;
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
QScriptIncrementalScanner::QScriptIncrementalScanner()
|
2009-09-28 14:49:39 +02:00
|
|
|
{
|
|
|
|
|
reset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QScriptIncrementalScanner::~QScriptIncrementalScanner()
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
void QScriptIncrementalScanner::reset()
|
|
|
|
|
{
|
|
|
|
|
m_endState = -1;
|
|
|
|
|
m_firstNonSpace = -1;
|
|
|
|
|
m_tokens.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void QScriptIncrementalScanner::operator()(int startState, const QString &text)
|
|
|
|
|
{
|
|
|
|
|
reset();
|
|
|
|
|
|
|
|
|
|
// tokens
|
|
|
|
|
enum TokenKind {
|
|
|
|
|
InputAlpha,
|
|
|
|
|
InputNumber,
|
|
|
|
|
InputAsterix,
|
|
|
|
|
InputSlash,
|
|
|
|
|
InputSpace,
|
|
|
|
|
InputQuotation,
|
|
|
|
|
InputApostrophe,
|
|
|
|
|
InputSep,
|
|
|
|
|
NumInputs
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// states
|
|
|
|
|
enum {
|
|
|
|
|
StateStandard,
|
2009-10-07 16:31:08 +02:00
|
|
|
StateCommentStart1, // '/'
|
|
|
|
|
StateCCommentStart2, // '*' after a '/'
|
|
|
|
|
StateCppCommentStart2, // '/' after a '/'
|
|
|
|
|
StateCComment, // after a "/*"
|
|
|
|
|
StateCppComment, // after a "//"
|
|
|
|
|
StateCCommentEnd1, // '*' in a CppComment
|
|
|
|
|
StateCCommentEnd2, // '/' after a '*' in a CppComment
|
2009-09-28 14:49:39 +02:00
|
|
|
StateStringStart,
|
|
|
|
|
StateString,
|
|
|
|
|
StateStringEnd,
|
|
|
|
|
StateString2Start,
|
|
|
|
|
StateString2,
|
|
|
|
|
StateString2End,
|
|
|
|
|
StateNumber,
|
|
|
|
|
NumStates
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uchar table[NumStates][NumInputs] = {
|
2009-10-07 16:31:08 +02:00
|
|
|
// InputAlpha InputNumber InputAsterix InputSlash InputSpace InputQuotation InputApostrophe InputSep
|
|
|
|
|
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStandard
|
|
|
|
|
{ StateStandard, StateNumber, StateCCommentStart2, StateCppCommentStart2, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCommentStart1
|
|
|
|
|
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentStart2
|
|
|
|
|
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppCommentStart2
|
|
|
|
|
{ StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCComment
|
|
|
|
|
{ StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppComment
|
|
|
|
|
{ StateCComment, StateCComment, StateCCommentEnd1, StateCCommentEnd2, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentEnd1
|
|
|
|
|
{ StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCCommentEnd2
|
|
|
|
|
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateStringStart
|
|
|
|
|
{ StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateString
|
|
|
|
|
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStringEnd
|
|
|
|
|
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2Start
|
|
|
|
|
{ StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2
|
|
|
|
|
{ StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateString2End
|
|
|
|
|
{ StateNumber, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard } // StateNumber
|
2009-09-28 14:49:39 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int state = startState;
|
|
|
|
|
if (text.isEmpty()) {
|
|
|
|
|
blockEnd(state, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int input = -1;
|
|
|
|
|
int i = 0;
|
|
|
|
|
bool lastWasBackSlash = false;
|
|
|
|
|
bool makeLastStandard = false;
|
|
|
|
|
|
|
|
|
|
static const QString alphabeth = QLatin1String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
|
|
|
|
static const QString mathChars = QString::fromLatin1("xXeE");
|
|
|
|
|
static const QString numbers = QString::fromLatin1("0123456789");
|
|
|
|
|
QChar lastChar;
|
|
|
|
|
|
|
|
|
|
int firstNonSpace = -1;
|
|
|
|
|
int lastNonSpace = -1;
|
|
|
|
|
|
|
|
|
|
forever {
|
|
|
|
|
const QChar qc = text.at(i);
|
2009-10-07 16:31:08 +02:00
|
|
|
const char c = qc.toLatin1();
|
2009-09-28 14:49:39 +02:00
|
|
|
|
|
|
|
|
if (lastWasBackSlash) {
|
|
|
|
|
input = InputSep;
|
|
|
|
|
} else {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case '*':
|
|
|
|
|
input = InputAsterix;
|
|
|
|
|
break;
|
|
|
|
|
case '/':
|
|
|
|
|
input = InputSlash;
|
|
|
|
|
break;
|
|
|
|
|
case '"':
|
|
|
|
|
input = InputQuotation;
|
|
|
|
|
break;
|
|
|
|
|
case '\'':
|
|
|
|
|
input = InputApostrophe;
|
|
|
|
|
break;
|
|
|
|
|
case ' ':
|
|
|
|
|
input = InputSpace;
|
|
|
|
|
break;
|
|
|
|
|
case '1': case '2': case '3': case '4': case '5':
|
|
|
|
|
case '6': case '7': case '8': case '9': case '0':
|
|
|
|
|
if (alphabeth.contains(lastChar) && (!mathChars.contains(lastChar) || !numbers.contains(text.at(i - 1)))) {
|
|
|
|
|
input = InputAlpha;
|
|
|
|
|
} else {
|
|
|
|
|
if (input == InputAlpha && numbers.contains(lastChar))
|
|
|
|
|
input = InputAlpha;
|
|
|
|
|
else
|
|
|
|
|
input = InputNumber;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2009-10-07 16:31:08 +02:00
|
|
|
case '.':
|
|
|
|
|
if (state == StateNumber)
|
|
|
|
|
input = InputNumber;
|
|
|
|
|
else
|
|
|
|
|
input = InputSep;
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
default: {
|
2009-10-07 16:31:08 +02:00
|
|
|
if (qc.isLetter() || c == '_')
|
2009-09-28 14:49:39 +02:00
|
|
|
input = InputAlpha;
|
|
|
|
|
else
|
|
|
|
|
input = InputSep;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (input != InputSpace) {
|
|
|
|
|
if (firstNonSpace < 0)
|
|
|
|
|
firstNonSpace = i;
|
|
|
|
|
lastNonSpace = i;
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
lastWasBackSlash = !lastWasBackSlash && c == '\\';
|
2009-09-28 14:49:39 +02:00
|
|
|
|
|
|
|
|
state = table[state][input];
|
|
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
|
case StateStandard: {
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
|
|
|
|
|
if (input == InputAlpha ) {
|
|
|
|
|
insertIdentifier(i);
|
|
|
|
|
} else if (input == InputSep || input == InputAsterix) {
|
|
|
|
|
insertCharToken(i, c);
|
2009-09-28 14:49:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case StateCommentStart1:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = true;
|
|
|
|
|
break;
|
|
|
|
|
case StateCCommentStart2:
|
|
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i - 1, 2);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateCppCommentStart2:
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i - 1, 2);
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
|
|
|
|
break;
|
|
|
|
|
case StateCComment:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i, 1);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateCppComment:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i, 1);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateCCommentEnd1:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i, 1);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateCCommentEnd2:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertComment(i, 1);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateStringStart:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateString:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateStringEnd:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateString2Start:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateString2:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateString2End:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertString(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
case StateNumber:
|
|
|
|
|
if (makeLastStandard)
|
2009-10-07 16:31:08 +02:00
|
|
|
insertCharToken(i - 1, text.at(i - 1).toAscii());
|
2009-09-28 14:49:39 +02:00
|
|
|
makeLastStandard = false;
|
2009-10-07 16:31:08 +02:00
|
|
|
insertNumber(i);
|
2009-09-28 14:49:39 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lastChar = qc;
|
|
|
|
|
i++;
|
|
|
|
|
if (i >= text.length())
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
scanForKeywords(text);
|
2009-09-28 14:49:39 +02:00
|
|
|
|
|
|
|
|
if (state == StateCComment
|
|
|
|
|
|| state == StateCCommentEnd1
|
|
|
|
|
|| state == StateCCommentStart2
|
|
|
|
|
) {
|
|
|
|
|
state = StateCComment;
|
|
|
|
|
} else {
|
|
|
|
|
state = StateStandard;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
blockEnd(state, firstNonSpace);
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
void QScriptIncrementalScanner::insertToken(int start, int length, Token::Kind kind, bool forceNewToken)
|
2009-09-28 14:49:39 +02:00
|
|
|
{
|
2009-10-07 16:31:08 +02:00
|
|
|
if (m_tokens.isEmpty() || forceNewToken) {
|
|
|
|
|
m_tokens.append(Token(start, length, kind));
|
2009-09-28 14:49:39 +02:00
|
|
|
} else {
|
2009-10-07 16:31:08 +02:00
|
|
|
Token &lastToken(m_tokens.last());
|
|
|
|
|
|
|
|
|
|
if (lastToken.kind == kind && lastToken.end() == start) {
|
|
|
|
|
lastToken.length += 1;
|
|
|
|
|
} else {
|
|
|
|
|
m_tokens.append(Token(start, length, kind));
|
|
|
|
|
}
|
2009-09-28 14:49:39 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
void QScriptIncrementalScanner::insertCharToken(int start, const char c)
|
2009-09-28 14:49:39 +02:00
|
|
|
{
|
|
|
|
|
Token::Kind kind;
|
|
|
|
|
|
|
|
|
|
switch (c) {
|
2009-10-07 16:31:08 +02:00
|
|
|
case '!':
|
|
|
|
|
case '<':
|
|
|
|
|
case '>':
|
|
|
|
|
case '+':
|
|
|
|
|
case '-':
|
|
|
|
|
case '*':
|
|
|
|
|
case '/':
|
|
|
|
|
case '%': kind = Token::Operator; break;
|
|
|
|
|
|
|
|
|
|
case ';': kind = Token::Semicolon; break;
|
|
|
|
|
case ':': kind = Token::Colon; break;
|
|
|
|
|
case ',': kind = Token::Comma; break;
|
|
|
|
|
case '.': kind = Token::Dot; break;
|
|
|
|
|
|
|
|
|
|
case '(': kind = Token::LeftParenthesis; break;
|
|
|
|
|
case ')': kind = Token::RightParenthesis; break;
|
|
|
|
|
case '{': kind = Token::LeftBrace; break;
|
|
|
|
|
case '}': kind = Token::RightBrace; break;
|
|
|
|
|
case '[': kind = Token::LeftBracket; break;
|
|
|
|
|
case ']': kind = Token::RightBracket; break;
|
|
|
|
|
|
|
|
|
|
default: kind = Token::Identifier; break;
|
2009-09-28 14:49:39 +02:00
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
insertToken(start, 1, kind, true);
|
2009-09-28 14:49:39 +02:00
|
|
|
}
|
|
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
void QScriptIncrementalScanner::scanForKeywords(const QString &text)
|
2009-09-28 14:49:39 +02:00
|
|
|
{
|
2009-10-07 16:31:08 +02:00
|
|
|
for (int i = 0; i < m_tokens.length(); ++i) {
|
|
|
|
|
Token &t(m_tokens[i]);
|
2009-09-28 14:49:39 +02:00
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
if (t.kind != Token::Identifier)
|
|
|
|
|
continue;
|
2009-09-28 14:49:39 +02:00
|
|
|
|
2009-10-07 16:31:08 +02:00
|
|
|
const QString id = text.mid(t.offset, t.length);
|
|
|
|
|
if (m_keywords.contains(id))
|
|
|
|
|
t.kind = Token::Keyword;
|
2009-09-28 14:49:39 +02:00
|
|
|
}
|
|
|
|
|
}
|