reshuffle word counting in expressions

this makes the value list sizehints more correct: instead of the number
of literals and expansions, it's now the number of actual words, and
that only if the expression starts with a literal (otherwise the
pre-allocation is discarded anyway).
the performance impact seems negligible, with qt being a tad slower and
creator a tad faster.

as a side effect, complaining about excess words in a test expression
does not immediately terminate the current line's parsing.
This commit is contained in:
Oswald Buddenhagen
2011-04-05 17:04:03 +02:00
parent c87eda573c
commit d9e5f676ef
2 changed files with 88 additions and 99 deletions

View File

@@ -235,7 +235,7 @@ void ProFileParser::finalizeHashStr(ushort *buf, uint len)
bool ProFileParser::read(ProFile *pro, const QString &in) bool ProFileParser::read(ProFile *pro, const QString &in)
{ {
m_fileName = pro->fileName(); m_proFile = pro;
m_lineNo = 1; m_lineNo = 1;
// Final precompiled token stream buffer // Final precompiled token stream buffer
@@ -289,12 +289,11 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
Context context = CtxTest; Context context = CtxTest;
int parens = 0; // Braces in value context int parens = 0; // Braces in value context
int argc = 0; int argc = 0;
int litCount = 0; int wordCount = 0; // Number of words in currently accumulated expression
int expCount = 0;
bool inError = false; bool inError = false;
bool putSpace = false; // Only ever true inside quoted string bool putSpace = false; // Only ever true inside quoted string
bool lineMarked = true; // For in-expression markers bool lineMarked = true; // For in-expression markers
ushort needSep = 0; // Complementary to putSpace: separator outside quotes ushort needSep = TokNewStr; // Complementary to putSpace: separator outside quotes
ushort quote = 0; ushort quote = 0;
ushort term = 0; ushort term = 0;
@@ -304,42 +303,53 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
ushort *oldTokPtr = tokPtr; ushort *oldTokPtr = tokPtr;
#define FLUSH_LHS_LITERAL(setSep) \ #define FLUSH_LHS_LITERAL() \
do { \ do { \
if ((tlen = ptr - xprPtr)) { \ if ((tlen = ptr - xprPtr)) { \
if (needSep) \
goto extraChars; \
finalizeHashStr(xprPtr, tlen); \ finalizeHashStr(xprPtr, tlen); \
if (setSep) \ if (needSep) { \
needSep = TokNewStr; \ wordCount++; \
needSep = 0; \
} \
} else { \ } else { \
ptr -= 4; \ ptr -= 4; \
if (setSep && ptr != buf) \
needSep = TokNewStr; \
} \ } \
} while (0) } while (0)
#define FLUSH_RHS_LITERAL(setSep) \ #define FLUSH_RHS_LITERAL() \
do { \ do { \
if ((tlen = ptr - xprPtr)) { \ if ((tlen = ptr - xprPtr)) { \
xprPtr[-2] = TokLiteral | needSep; \ xprPtr[-2] = TokLiteral | needSep; \
xprPtr[-1] = tlen; \ xprPtr[-1] = tlen; \
if (setSep) \ if (needSep) { \
needSep = TokNewStr; \ wordCount++; \
litCount++; \ needSep = 0; \
} \
} else { \ } else { \
ptr -= 2; \ ptr -= 2; \
if (setSep && ptr != ((context == CtxValue) ? tokPtr : buf)) \
needSep = TokNewStr; \
} \ } \
} while (0) } while (0)
#define FLUSH_LITERAL(setSep) \ #define FLUSH_LITERAL() \
do { \ do { \
if (context == CtxTest) \ if (context == CtxTest) \
FLUSH_LHS_LITERAL(setSep); \ FLUSH_LHS_LITERAL(); \
else \ else \
FLUSH_RHS_LITERAL(setSep); \ FLUSH_RHS_LITERAL(); \
} while (0)
#define FLUSH_VALUE_LIST() \
do { \
if (wordCount > 1) { \
xprPtr = tokPtr; \
if (*xprPtr == TokLine) \
xprPtr += 2; \
tokPtr[-1] = ((*xprPtr & TokMask) == TokLiteral) ? wordCount : 0; \
} else { \
tokPtr[-1] = 0; \
} \
tokPtr = ptr; \
putTok(tokPtr, TokValueTerminator); \
} while (0) } while (0)
forever { forever {
@@ -415,7 +425,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
// Finally, do the tokenization // Finally, do the tokenization
ushort tok, rtok; ushort tok, rtok;
int tlen; int tlen;
newToken: newWord:
do { do {
if (cur == end) if (cur == end)
goto lineEnd; goto lineEnd;
@@ -429,27 +439,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
putSpace = false; putSpace = false;
*ptr++ = ' '; *ptr++ = ' ';
} }
tlen = ptr - xprPtr; FLUSH_LITERAL();
if (context == CtxTest) {
if (needSep) {
extraChars:
parseError(fL1S("Extra characters after test expression."));
goto parseErr;
}
if (tlen)
finalizeHashStr(xprPtr, tlen);
else
ptr -= 4;
} else {
if (tlen) {
xprPtr[-2] = TokLiteral | needSep;
xprPtr[-1] = tlen;
needSep = 0;
litCount++;
} else {
ptr -= 2;
}
}
if (!lineMarked) { if (!lineMarked) {
lineMarked = true; lineMarked = true;
*ptr++ = TokLine; *ptr++ = TokLine;
@@ -493,9 +483,10 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
notfunc: notfunc:
if (quote) if (quote)
tok |= TokQuoted; tok |= TokQuoted;
tok |= needSep; if (needSep) {
needSep = 0; tok |= needSep;
expCount++; wordCount++;
}
tlen = ptr - xprPtr; tlen = ptr - xprPtr;
if (rtok == TokVariable) { if (rtok == TokVariable) {
xprPtr[-4] = tok; xprPtr[-4] = tok;
@@ -517,8 +508,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
top.terminator = term; top.terminator = term;
top.context = context; top.context = context;
top.argc = argc; top.argc = argc;
top.litCount = litCount; top.wordCount = wordCount;
top.expCount = expCount;
} }
parens = 0; parens = 0;
quote = 0; quote = 0;
@@ -526,9 +516,12 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
argc = 1; argc = 1;
context = CtxArgs; context = CtxArgs;
nextToken: nextToken:
wordCount = 0;
nextWord:
ptr += (context == CtxTest) ? 4 : 2; ptr += (context == CtxTest) ? 4 : 2;
xprPtr = ptr; xprPtr = ptr;
goto newToken; needSep = TokNewStr;
goto newWord;
} }
if (term) { if (term) {
cur++; cur++;
@@ -548,6 +541,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
joinToken: joinToken:
ptr += (context == CtxTest) ? 4 : 2; ptr += (context == CtxTest) ? 4 : 2;
xprPtr = ptr; xprPtr = ptr;
needSep = 0;
goto nextChr; goto nextChr;
} }
} else if (c == '\\' && cur != end) { } else if (c == '\\' && cur != end) {
@@ -576,15 +570,15 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
quote = c; quote = c;
goto nextChr; goto nextChr;
} else if (c == ' ' || c == '\t') { } else if (c == ' ' || c == '\t') {
FLUSH_LITERAL(true); FLUSH_LITERAL();
goto nextToken; goto nextWord;
} else if (context == CtxArgs) { } else if (context == CtxArgs) {
// Function arg context // Function arg context
if (c == '(') { if (c == '(') {
++parens; ++parens;
} else if (c == ')') { } else if (c == ')') {
if (--parens < 0) { if (--parens < 0) {
FLUSH_RHS_LITERAL(false); FLUSH_RHS_LITERAL();
*ptr++ = TokFuncTerminator; *ptr++ = TokFuncTerminator;
int theargc = argc; int theargc = argc;
{ {
@@ -594,74 +588,70 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
term = top.terminator; term = top.terminator;
context = top.context; context = top.context;
argc = top.argc; argc = top.argc;
litCount = top.litCount; wordCount = top.wordCount;
expCount = top.expCount;
xprStack.resize(xprStack.size() - 1); xprStack.resize(xprStack.size() - 1);
} }
if (term == ':') { if (term == ':') {
finalizeCall(tokPtr, buf, ptr, theargc); finalizeCall(tokPtr, buf, ptr, theargc);
needSep = TokNewStr;
goto nextItem; goto nextItem;
} else if (term == '}') { } else if (term == '}') {
c = (cur == end) ? 0 : *cur++; c = (cur == end) ? 0 : *cur++;
needSep = 0;
goto checkTerm; goto checkTerm;
} else { } else {
Q_ASSERT(!term); Q_ASSERT(!term);
needSep = 0;
goto joinToken; goto joinToken;
} }
} }
} else if (!parens && c == ',') { } else if (!parens && c == ',') {
FLUSH_RHS_LITERAL(false); FLUSH_RHS_LITERAL();
*ptr++ = TokArgSeparator; *ptr++ = TokArgSeparator;
argc++; argc++;
needSep = 0;
goto nextToken; goto nextToken;
} }
} else if (context == CtxTest) { } else if (context == CtxTest) {
// Test or LHS context // Test or LHS context
if (c == '(') { if (c == '(') {
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
if (ptr == buf) { if (wordCount != 1) {
parseError(fL1S("Opening parenthesis without prior test name.")); if (wordCount)
parseError(fL1S("Extra characters after test expression."));
else
parseError(fL1S("Opening parenthesis without prior test name."));
goto parseErr; goto parseErr;
} }
*ptr++ = TokTestCall; *ptr++ = TokTestCall;
term = ':'; term = ':';
needSep = 0;
goto funcCall; goto funcCall;
} else if (c == '!' && ptr == xprPtr) { } else if (c == '!' && ptr == xprPtr) {
m_invert ^= true; m_invert ^= true;
goto nextChr; goto nextChr;
} else if (c == ':') { } else if (c == ':') {
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
finalizeCond(tokPtr, buf, ptr); finalizeCond(tokPtr, buf, ptr, wordCount);
if (m_state == StNew) if (m_state == StNew)
parseError(fL1S("And operator without prior condition.")); parseError(fL1S("And operator without prior condition."));
else else
m_operator = AndOperator; m_operator = AndOperator;
nextItem: nextItem:
ptr = buf; ptr = buf;
needSep = 0;
goto nextToken; goto nextToken;
} else if (c == '|') { } else if (c == '|') {
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
finalizeCond(tokPtr, buf, ptr); finalizeCond(tokPtr, buf, ptr, wordCount);
if (m_state != StCond) if (m_state != StCond)
parseError(fL1S("Or operator without prior condition.")); parseError(fL1S("Or operator without prior condition."));
else else
m_operator = OrOperator; m_operator = OrOperator;
goto nextItem; goto nextItem;
} else if (c == '{') { } else if (c == '{') {
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
finalizeCond(tokPtr, buf, ptr); finalizeCond(tokPtr, buf, ptr, wordCount);
flushCond(tokPtr); flushCond(tokPtr);
++m_blockstack.top().braceLevel; ++m_blockstack.top().braceLevel;
goto nextItem; goto nextItem;
} else if (c == '}') { } else if (c == '}') {
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
finalizeCond(tokPtr, buf, ptr); finalizeCond(tokPtr, buf, ptr, wordCount);
flushScopes(tokPtr); flushScopes(tokPtr);
closeScope: closeScope:
if (!m_blockstack.top().braceLevel) { if (!m_blockstack.top().braceLevel) {
@@ -693,19 +683,17 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
} else if (c == '=') { } else if (c == '=') {
tok = TokAssign; tok = TokAssign;
doOp: doOp:
FLUSH_LHS_LITERAL(false); FLUSH_LHS_LITERAL();
flushCond(tokPtr); flushCond(tokPtr);
putLineMarker(tokPtr); putLineMarker(tokPtr);
if (!(tlen = ptr - buf)) { if (wordCount != 1) {
parseError(fL1S("Assignment operator without prior variable name.")); parseError(fL1S("Assignment needs exactly one word on the left hand side."));
goto parseErr; goto parseErr;
} }
putBlock(tokPtr, buf, tlen); putBlock(tokPtr, buf, ptr - buf);
putTok(tokPtr, tok); putTok(tokPtr, tok);
context = CtxValue; context = CtxValue;
ptr = ++tokPtr; ptr = ++tokPtr;
litCount = expCount = 0;
needSep = 0;
goto nextToken; goto nextToken;
} }
} else { // context == CtxValue } else { // context == CtxValue
@@ -713,10 +701,8 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
++parens; ++parens;
} else if (c == '}') { } else if (c == '}') {
if (!parens) { if (!parens) {
FLUSH_RHS_LITERAL(false); FLUSH_RHS_LITERAL();
tokPtr[-1] = litCount ? litCount + expCount : 0; FLUSH_VALUE_LIST();
tokPtr = ptr;
putTok(tokPtr, TokValueTerminator);
context = CtxTest; context = CtxTest;
goto closeScope; goto closeScope;
} }
@@ -739,7 +725,8 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
if (quote) { if (quote) {
putSpace = true; putSpace = true;
} else { } else {
FLUSH_LITERAL(true); FLUSH_LITERAL();
needSep = TokNewStr;
ptr += (context == CtxTest) ? 4 : 2; ptr += (context == CtxTest) ? 4 : 2;
xprPtr = ptr; xprPtr = ptr;
} }
@@ -747,7 +734,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
c = '\n'; c = '\n';
cur = cptr; cur = cptr;
flushLine: flushLine:
FLUSH_LITERAL(false); FLUSH_LITERAL();
if (quote) { if (quote) {
parseError(fL1S("Missing closing %1 quote").arg(QChar(quote))); parseError(fL1S("Missing closing %1 quote").arg(QChar(quote)));
goto parseErr; goto parseErr;
@@ -756,13 +743,10 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
parseError(fL1S("Missing closing parenthesis in function call")); parseError(fL1S("Missing closing parenthesis in function call"));
goto parseErr; goto parseErr;
} }
if (context == CtxValue) { if (context == CtxValue)
tokPtr[-1] = litCount ? litCount + expCount : 0; FLUSH_VALUE_LIST();
tokPtr = ptr; else
putTok(tokPtr, TokValueTerminator); finalizeCond(tokPtr, buf, ptr, wordCount);
} else {
finalizeCond(tokPtr, buf, ptr);
}
if (!c) if (!c)
break; break;
++m_lineNo; ++m_lineNo;
@@ -793,6 +777,7 @@ bool ProFileParser::read(ProFile *pro, const QString &in)
*pro->itemsRef() = QString(tokBuff.constData(), tokPtr - (ushort *)tokBuff.constData()); *pro->itemsRef() = QString(tokBuff.constData(), tokPtr - (ushort *)tokBuff.constData());
return true; return true;
#undef FLUSH_VALUE_LIST
#undef FLUSH_LITERAL #undef FLUSH_LITERAL
#undef FLUSH_LHS_LITERAL #undef FLUSH_LHS_LITERAL
#undef FLUSH_RHS_LITERAL #undef FLUSH_RHS_LITERAL
@@ -877,10 +862,15 @@ void ProFileParser::finalizeTest(ushort *&tokPtr)
m_canElse = true; m_canElse = true;
} }
void ProFileParser::finalizeCond(ushort *&tokPtr, ushort *uc, ushort *ptr) void ProFileParser::finalizeCond(ushort *&tokPtr, ushort *uc, ushort *ptr, int wordCount)
{ {
if (ptr == uc) if (wordCount != 1) {
if (wordCount) {
parseError(fL1S("Extra characters after test expression."));
m_proFile->setOk(false);
}
return; return;
}
// Check for magic tokens // Check for magic tokens
if (*uc == TokHashLiteral) { if (*uc == TokHashLiteral) {
@@ -944,7 +934,7 @@ void ProFileParser::finalizeCall(ushort *&tokPtr, ushort *uc, ushort *ptr, int a
parseError(fL1S("Unexpected operator in front of for().")); parseError(fL1S("Unexpected operator in front of for()."));
return; return;
} }
if (*uce == TokLiteral) { if (*uce == (TokLiteral|TokNewStr)) {
nlen = uce[1]; nlen = uce[1];
uc = uce + 2 + nlen; uc = uce + 2 + nlen;
if (*uc == TokFuncTerminator) { if (*uc == TokFuncTerminator) {
@@ -992,7 +982,7 @@ void ProFileParser::finalizeCall(ushort *&tokPtr, ushort *uc, ushort *ptr, int a
parseError(fL1S("Unexpected operator in front of function definition.")); parseError(fL1S("Unexpected operator in front of function definition."));
return; return;
} }
if (*uce == TokLiteral) { if (*uce == (TokLiteral|TokNewStr)) {
uint nlen = uce[1]; uint nlen = uce[1];
if (uce[nlen + 2] == TokFuncTerminator) { if (uce[nlen + 2] == TokFuncTerminator) {
if (m_operator != NoOperator) { if (m_operator != NoOperator) {
@@ -1019,7 +1009,7 @@ void ProFileParser::finalizeCall(ushort *&tokPtr, ushort *uc, ushort *ptr, int a
void ProFileParser::parseError(const QString &msg) const void ProFileParser::parseError(const QString &msg) const
{ {
if (m_handler) if (m_handler)
m_handler->parseError(m_fileName, m_lineNo, msg); m_handler->parseError(m_proFile->fileName(), m_lineNo, msg);
} }
QT_END_NAMESPACE QT_END_NAMESPACE

View File

@@ -98,8 +98,7 @@ private:
struct ParseCtx { struct ParseCtx {
int parens; // Nesting of non-functional parentheses int parens; // Nesting of non-functional parentheses
int argc; // Number of arguments in current function call int argc; // Number of arguments in current function call
int litCount; // Number of literals in current expression int wordCount; // Number of words in current expression
int expCount; // Number of expansions in current expression
Context context; Context context;
ushort quote; // Enclosing quote type ushort quote; // Enclosing quote type
ushort terminator; // '}' if replace function call is braced, ':' if test function ushort terminator; // '}' if replace function call is braced, ':' if test function
@@ -114,7 +113,7 @@ private:
void putHashStr(ushort *&pTokPtr, const ushort *buf, uint len); void putHashStr(ushort *&pTokPtr, const ushort *buf, uint len);
void finalizeHashStr(ushort *buf, uint len); void finalizeHashStr(ushort *buf, uint len);
void putLineMarker(ushort *&tokPtr); void putLineMarker(ushort *&tokPtr);
void finalizeCond(ushort *&tokPtr, ushort *uc, ushort *ptr); void finalizeCond(ushort *&tokPtr, ushort *uc, ushort *ptr, int wordCount);
void finalizeCall(ushort *&tokPtr, ushort *uc, ushort *ptr, int argc); void finalizeCall(ushort *&tokPtr, ushort *uc, ushort *ptr, int argc);
void finalizeTest(ushort *&tokPtr); void finalizeTest(ushort *&tokPtr);
void enterScope(ushort *&tokPtr, bool special, ScopeState state); void enterScope(ushort *&tokPtr, bool special, ScopeState state);
@@ -125,7 +124,7 @@ private:
void parseError(const QString &msg) const; void parseError(const QString &msg) const;
// Current location // Current location
QString m_fileName; ProFile *m_proFile;
int m_lineNo; int m_lineNo;
QStack<BlockScope> m_blockstack; QStack<BlockScope> m_blockstack;