next attempt at fixing low-level parsing

in particular, fix line continuation processing.

also, reintroduce some error handling. it checks only one condition so
far (malformed assignment statement) and is entirely silent, but that's
all that was necessary for internal robustness.
This commit is contained in:
Oswald Buddenhagen
2009-08-07 19:17:44 +02:00
parent 9efc9ce0e4
commit 8e726a6d58

View File

@@ -129,8 +129,9 @@ public:
bool read(ProFile *pro, QTextStream *ts); bool read(ProFile *pro, QTextStream *ts);
ProBlock *currentBlock(); ProBlock *currentBlock();
void updateItem(); void updateItem(ushort *ptr);
void insertVariable(const ushort **pCur, const ushort *end); void updateItem2();
bool insertVariable(ushort *ptr, bool *doSplit, bool *doSemicolon);
void insertOperator(const char op); void insertOperator(const char op);
void insertComment(const QString &comment); void insertComment(const QString &comment);
void enterScope(bool multiLine); void enterScope(bool multiLine);
@@ -143,7 +144,6 @@ public:
ProItem *m_commentItem; ProItem *m_commentItem;
QString m_proitem; QString m_proitem;
QString m_pendingComment; QString m_pendingComment;
ushort *m_proitemPtr;
enum StrState { NeverStarted, NotStarted, Started, PutSpace }; enum StrState { NeverStarted, NotStarted, Started, PutSpace };
@@ -284,111 +284,239 @@ bool ProFileEvaluator::Private::read(ProFile *pro, QTextStream *ts)
m_blockstack.clear(); m_blockstack.clear();
m_blockstack.push(pro); m_blockstack.push(pro);
freshLine:
int parens = 0; int parens = 0;
bool inQuote = false; bool inError = false;
bool inAssignment = false;
bool doSplit = false;
bool doSemicolon = false;
bool putSpace = false;
ushort quote = 0;
while (!ts->atEnd()) { while (!ts->atEnd()) {
QString line = ts->readLine(); QString line = ts->readLine();
const ushort *cur = (const ushort *)line.unicode(), const ushort *cur = (const ushort *)line.unicode(),
*end = cur + line.length(), *end = cur + line.length(),
*orgend = end,
*cmtptr = 0; *cmtptr = 0;
m_proitem.reserve(line.length()); ushort c, *ptr;
m_proitemPtr = (ushort *)m_proitem.unicode();
enum { NotEscaped, Escaped, PostEscaped } escaped = NotEscaped; // First, skip leading whitespace
StrState sts = NeverStarted; forever {
goto startItem; if (cur == end) { // Entirely empty line (sans whitespace)
nextItem: updateItem2();
escaped = NotEscaped; finalizeBlock();
nextItem1: ++m_lineNo;
sts = NotStarted; goto freshLine;
startItem: }
ushort *ptr = m_proitemPtr; c = *cur;
while (cur < end) { if (c != ' ' && c != '\t')
ushort c = *cur++; break;
if (c == '#') { // Yep - no escaping possible cur++;
cmtptr = cur; }
// Then strip comments. Yep - no escaping is possible.
for (const ushort *cptr = cur; cptr != end; ++cptr)
if (*cptr == '#') {
if (cptr == cur) { // Line with only a comment (sans whitespace)
if (!inError)
insertComment(line.right(end - (cptr + 1)));
// Qmake bizarreness: such lines do not affect line continuations
goto ignore;
}
end = cptr;
cmtptr = cptr + 1;
break; break;
} }
if (escaped != Escaped) {
if (c == '\\') { // Then look for line continuations
escaped = Escaped; bool lineCont;
goto putch; forever {
} else if (c == '"') { // We don't have to check for underrun here, as we already determined
inQuote = !inQuote; // that the line is non-empty.
goto putch1; ushort ec = *(end - 1);
if (ec == '\\') {
--end;
lineCont = true;
break;
} }
if (ec != ' ' && ec != '\t') {
lineCont = false;
break;
} }
if (!inQuote) { --end;
}
if (!inError) {
// May need enough space for this line and anything accumulated so far
m_proitem.reserve(m_proitem.length() + (end - cur));
// Finally, do the tokenization
if (!inAssignment) {
newItem:
ptr = (ushort *)m_proitem.unicode() + m_proitem.length();
do {
if (cur == end)
goto lineEnd;
c = *cur++;
} while (c == ' ' || c == '\t');
forever {
if (c == '"') {
quote = '"' - quote;
} else if (!quote) {
if (c == '(') { if (c == '(') {
++parens; ++parens;
} else if (c == ')') { } else if (c == ')') {
--parens; --parens;
} else if (!parens) { } else if (!parens) {
if (m_block && (m_block->blockKind() & ProBlock::VariableKind)) {
if (c == ' ' || c == '\t') {
m_proitemPtr = ptr;
updateItem();
if (escaped == Escaped)
escaped = PostEscaped;
goto nextItem1;
}
} else {
if (c == ':') { if (c == ':') {
m_proitemPtr = ptr; updateItem(ptr);
enterScope(false); enterScope(false);
goto nextItem; nextItem:
putSpace = false;
goto newItem;
} }
if (c == '{') { if (c == '{') {
m_proitemPtr = ptr; updateItem(ptr);
enterScope(true); enterScope(true);
goto nextItem; goto nextItem;
} }
if (c == '}') { if (c == '}') {
m_proitemPtr = ptr; updateItem(ptr);
leaveScope(); leaveScope();
goto nextItem; goto nextItem;
} }
if (c == '=') { if (c == '=') {
m_proitemPtr = ptr; if (insertVariable(ptr, &doSplit, &doSemicolon)) {
insertVariable(&cur, end); inAssignment = true;
goto nextItem; putSpace = false;
break;
}
inError = true;
goto skip;
} }
if (c == '|' || c == '!') { if (c == '|' || c == '!') {
m_proitemPtr = ptr; updateItem(ptr);
insertOperator(c); insertOperator(c);
goto nextItem; goto nextItem;
} }
} }
} }
}
if (c == ' ' || c == '\t') { if (putSpace) {
if (sts == Started) { putSpace = false;
sts = PutSpace; *ptr++ = ' ';
if (escaped == Escaped) }
escaped = PostEscaped; *ptr++ = c;
forever {
if (cur == end)
goto lineEnd;
c = *cur++;
if (c != ' ' && c != '\t')
break;
putSpace = true;
}
}
} // !inAssignment
nextVal:
ptr = (ushort *)m_proitem.unicode() + m_proitem.length();
do {
if (cur == end)
goto lineEnd;
c = *cur++;
} while (c == ' ' || c == '\t');
if (doSplit) {
// Qmake's parser supports truly bizarre quote nesting here, but later
// stages (in qmake) don't grok it anyway. So make it simple instead.
forever {
if (c == '\\') {
ushort ec;
if (cur != end && ((ec = *cur) == '"' || ec == '\'')) {
++cur;
if (putSpace) {
putSpace = false;
*ptr++ = ' ';
}
*ptr++ = '\\';
*ptr++ = ec;
goto getNext;
} }
} else { } else {
putch1: if (quote) {
escaped = NotEscaped; if (c == quote) {
putch: quote = 0;
if (sts == PutSpace) } else if (c == ' ' || c == '\t') {
putSpace = true;
goto getNext;
}
} else {
if (c == '"' || c == '\'') {
quote = c;
} else if (c == ')') {
--parens;
} else if (c == '(') {
++parens;
} else if (c == ' ' || c == '\t') {
if (parens) {
putSpace = true;
goto getNext;
}
updateItem(ptr);
// assert(!putSpace);
goto nextVal;
}
}
}
if (putSpace) {
putSpace = false;
*ptr++ = ' '; *ptr++ = ' ';
}
*ptr++ = c; *ptr++ = c;
sts = Started;
getNext:
if (cur == end)
break;
c = *cur++;
}
} else { // doSplit
forever {
if (putSpace) {
putSpace = false;
*ptr++ = ' ';
}
*ptr++ = c;
forever {
if (cur == end)
goto lineEnd;
c = *cur++;
if (c != ' ' && c != '\t')
break;
putSpace = true;
} }
} }
if (escaped != NotEscaped) {
--ptr;
if (ptr != (ushort *)m_proitem.unicode() && *(ptr - 1) == ' ')
--ptr;
} }
m_proitemPtr = ptr; lineEnd:
updateItem(); if (lineCont) {
m_proitem.resize(ptr - (ushort *)m_proitem.unicode());
putSpace = !m_proitem.isEmpty();
} else {
updateItem(ptr);
putSpace = false;
}
if (cmtptr) if (cmtptr)
insertComment(line.right(end - cmtptr).simplified()); insertComment(line.right(orgend - cmtptr));
if (sts != NeverStarted && escaped == NotEscaped) } // !inError
skip:
if (!lineCont) {
finalizeBlock(); finalizeBlock();
++m_lineNo; ++m_lineNo;
goto freshLine;
}
ignore:
++m_lineNo;
} }
m_proitem.clear(); // Throw away pre-allocation m_proitem.clear(); // Throw away pre-allocation
return true; return true;
@@ -402,14 +530,13 @@ void ProFileEvaluator::Private::finalizeBlock()
m_commentItem = 0; m_commentItem = 0;
} }
void ProFileEvaluator::Private::insertVariable(const ushort **pCur, const ushort *end) bool ProFileEvaluator::Private::insertVariable(ushort *ptr, bool *doSplit, bool *doSemicolon)
{ {
ProVariable::VariableOperator opkind; ProVariable::VariableOperator opkind;
ushort *uc = (ushort *)m_proitem.unicode(); ushort *uc = (ushort *)m_proitem.unicode();
ushort *ptr = m_proitemPtr;
if (ptr == uc) // Line starting with '=', like a conflict marker if (ptr == uc) // Line starting with '=', like a conflict marker
return; return false;
switch (*(ptr - 1)) { switch (*(ptr - 1)) {
case '+': case '+':
@@ -430,10 +557,15 @@ void ProFileEvaluator::Private::insertVariable(const ushort **pCur, const ushort
break; break;
default: default:
opkind = ProVariable::SetOperator; opkind = ProVariable::SetOperator;
goto skipTrunc;
} }
while (ptr != uc && *(ptr - 1) == ' ') if (ptr == uc) // Line starting with manipulation operator
return false;
if (*(ptr - 1) == ' ')
--ptr; --ptr;
skipTrunc:
m_proitem.resize(ptr - uc); m_proitem.resize(ptr - uc);
QString proVar = m_proitem; QString proVar = m_proitem;
proVar.detach(); proVar.detach();
@@ -451,38 +583,16 @@ void ProFileEvaluator::Private::insertVariable(const ushort **pCur, const ushort
} }
m_commentItem = variable; m_commentItem = variable;
if (opkind == ProVariable::ReplaceOperator) { m_proitem.resize(0);
// skip util end of line or comment
StrState sts = NotStarted;
ptr = uc;
const ushort *cur = *pCur;
while (cur < end) {
ushort c = *cur;
if (c == '#') // comment?
break;
++cur;
if (c == ' ' || c == '\t') { *doSplit = (opkind != ProVariable::ReplaceOperator);
if (sts == Started) *doSemicolon = (proVar == QLatin1String("DEPENDPATH")
sts = PutSpace; || proVar == QLatin1String("INCLUDEPATH"));
} else { return true;
if (sts == PutSpace)
*ptr++ = ' ';
*ptr++ = c;
sts = Started;
}
}
*pCur = cur;
m_proitemPtr = ptr;
} else {
m_proitemPtr = uc;
}
} }
void ProFileEvaluator::Private::insertOperator(const char op) void ProFileEvaluator::Private::insertOperator(const char op)
{ {
updateItem();
ProOperator::OperatorKind opkind; ProOperator::OperatorKind opkind;
switch (op) { switch (op) {
case '!': case '!':
@@ -527,8 +637,6 @@ void ProFileEvaluator::Private::insertComment(const QString &comment)
void ProFileEvaluator::Private::enterScope(bool multiLine) void ProFileEvaluator::Private::enterScope(bool multiLine)
{ {
updateItem();
ProBlock *parent = currentBlock(); ProBlock *parent = currentBlock();
ProBlock *block = new ProBlock(parent); ProBlock *block = new ProBlock(parent);
block->setLineNumber(m_lineNo); block->setLineNumber(m_lineNo);
@@ -547,7 +655,6 @@ void ProFileEvaluator::Private::enterScope(bool multiLine)
void ProFileEvaluator::Private::leaveScope() void ProFileEvaluator::Private::leaveScope()
{ {
updateItem();
if (m_blockstack.count() == 1) if (m_blockstack.count() == 1)
q->errorMessage(format("Excess closing brace.")); q->errorMessage(format("Excess closing brace."));
else else
@@ -575,16 +682,17 @@ ProBlock *ProFileEvaluator::Private::currentBlock()
return m_block; return m_block;
} }
void ProFileEvaluator::Private::updateItem() void ProFileEvaluator::Private::updateItem(ushort *ptr)
{ {
ushort *uc = (ushort *)m_proitem.unicode(); m_proitem.resize(ptr - (ushort *)m_proitem.unicode());
ushort *ptr = m_proitemPtr; updateItem2();
}
if (ptr == uc) void ProFileEvaluator::Private::updateItem2()
{
if (m_proitem.isEmpty())
return; return;
m_proitem.resize(ptr - uc);
m_proitemPtr = uc;
QString proItem = m_proitem; QString proItem = m_proitem;
proItem.detach(); proItem.detach();
@@ -598,6 +706,8 @@ void ProFileEvaluator::Private::updateItem()
} }
m_commentItem->setLineNumber(m_lineNo); m_commentItem->setLineNumber(m_lineNo);
block->appendItem(m_commentItem); block->appendItem(m_commentItem);
m_proitem.resize(0);
} }
//////// Evaluator tools ///////// //////// Evaluator tools /////////