forked from qt-creator/qt-creator
C++: clean up numeric literal parsing and add support for n3472.
Separate the messy pp-number parsing from the numeric literal parsing. The C/C++ preprocessor makes a grown man cry, but at least we have "proper" literal parsing when we want it, including C++1y binary literals. Next step is digit separators (n3781). Change-Id: Ia069eef454ed5c056f77694a5b8a595d0b76adc4 Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
This commit is contained in:
committed by
Nikolai Kosjar
parent
16becbd29c
commit
242b3f4110
167
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
167
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
@@ -305,24 +305,27 @@ void Lexer::scan_helper(Token *tok)
|
||||
tok->f.kind = T_ERROR;
|
||||
}
|
||||
} else if (std::isdigit(_yychar)) {
|
||||
if (f._ppMode) {
|
||||
scanPreprocessorNumber(tok, true);
|
||||
break;
|
||||
}
|
||||
|
||||
const char *yytext = _currentChar - 2;
|
||||
do {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
scanDigitSequence(); // this is optional: we already skipped over the first digit
|
||||
scanExponentPart();
|
||||
scanOptionalFloatingSuffix();
|
||||
if (std::isalnum(_yychar) || _yychar == '_') {
|
||||
do {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
yyinp();
|
||||
// ### CPP_CHECK(std::isdigit(_yychar));
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (_yychar);
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
} while (std::isalnum(_yychar) || _yychar == '_');
|
||||
tok->f.kind = T_ERROR;
|
||||
} else {
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
} else {
|
||||
tok->f.kind = T_DOT;
|
||||
}
|
||||
@@ -651,7 +654,10 @@ void Lexer::scan_helper(Token *tok)
|
||||
} else if (std::isalpha(ch) || ch == '_' || ch == '$' || isByteOfMultiByteCodePoint(ch)) {
|
||||
scanIdentifier(tok, _currentChar - _tokenStart - 1);
|
||||
} else if (std::isdigit(ch)) {
|
||||
scanNumericLiteral(tok);
|
||||
if (f._ppMode)
|
||||
scanPreprocessorNumber(tok, false);
|
||||
else
|
||||
scanNumericLiteral(tok);
|
||||
} else {
|
||||
tok->f.kind = T_ERROR;
|
||||
}
|
||||
@@ -776,26 +782,141 @@ void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
bool Lexer::scanDigitSequence()
|
||||
{
|
||||
if (!std::isdigit(_yychar))
|
||||
return false;
|
||||
yyinp();
|
||||
while (std::isdigit(_yychar))
|
||||
yyinp();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Lexer::scanExponentPart()
|
||||
{
|
||||
if (_yychar != 'e' && _yychar != 'E')
|
||||
return false;
|
||||
yyinp();
|
||||
if (_yychar == '+' || _yychar == '-')
|
||||
yyinp();
|
||||
return scanDigitSequence();
|
||||
}
|
||||
|
||||
void Lexer::scanOptionalFloatingSuffix()
|
||||
{
|
||||
if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
|
||||
yyinp();
|
||||
}
|
||||
|
||||
void Lexer::scanOptionalIntegerSuffix(bool allowU)
|
||||
{
|
||||
switch(_yychar) {
|
||||
case 'u':
|
||||
case 'U':
|
||||
if (allowU) {
|
||||
yyinp();
|
||||
scanOptionalIntegerSuffix(false);
|
||||
}
|
||||
return;
|
||||
case 'l':
|
||||
yyinp();
|
||||
if (_yychar == 'l')
|
||||
yyinp();
|
||||
return;
|
||||
case 'L':
|
||||
yyinp();
|
||||
if (_yychar == 'L')
|
||||
yyinp();
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::scanNumericLiteral(Token *tok)
|
||||
{
|
||||
const char *yytext = _currentChar - 1;
|
||||
if (*yytext == '0' && _yychar) {
|
||||
if (_yychar == 'x' || _yychar == 'X') {
|
||||
yyinp();
|
||||
while (std::isdigit(_yychar) ||
|
||||
(_yychar >= 'a' && _yychar <= 'f') ||
|
||||
(_yychar >= 'A' && _yychar <= 'F')) {
|
||||
yyinp();
|
||||
}
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
} else if (_yychar == 'b' || _yychar == 'B') { // see n3472
|
||||
yyinp();
|
||||
while (_yychar == '0' || _yychar == '1')
|
||||
yyinp();
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
} else if (_yychar >= '0' && _yychar <= '7') {
|
||||
do {
|
||||
yyinp();
|
||||
} while (_yychar >= '0' && _yychar <= '7');
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
}
|
||||
}
|
||||
|
||||
while (_yychar) {
|
||||
if (_yychar == '.') {
|
||||
yyinp();
|
||||
scanDigitSequence(); // this is optional: "1." is a valid floating point number
|
||||
scanExponentPart();
|
||||
scanOptionalFloatingSuffix();
|
||||
break;
|
||||
} else if (_yychar == 'e' || _yychar == 'E') {
|
||||
if (scanExponentPart())
|
||||
scanOptionalFloatingSuffix();
|
||||
break;
|
||||
} else if (std::isdigit(_yychar)) {
|
||||
yyinp();
|
||||
} else {
|
||||
scanOptionalIntegerSuffix();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
theEnd:
|
||||
if (std::isalnum(_yychar) || _yychar == '_') {
|
||||
do {
|
||||
yyinp();
|
||||
} while (std::isalnum(_yychar) || _yychar == '_');
|
||||
tok->f.kind = T_ERROR;
|
||||
} else {
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
|
||||
{
|
||||
const char *yytext = _currentChar - (dotAlreadySkipped ? 2 : 1);
|
||||
if (dotAlreadySkipped &&
|
||||
(!_yychar || (_yychar && !std::isdigit(_yychar)))) {
|
||||
tok->f.kind = T_DOT;
|
||||
return;
|
||||
}
|
||||
|
||||
while (_yychar) {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
if (_yychar == '+' || _yychar == '-')
|
||||
yyinp();
|
||||
// ### CPP_CHECK(std::isdigit(_yychar));
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
} else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
9
src/libs/3rdparty/cplusplus/Lexer.h
vendored
9
src/libs/3rdparty/cplusplus/Lexer.h
vendored
@@ -61,6 +61,9 @@ public:
|
||||
LanguageFeatures languageFeatures() const { return _languageFeatures; }
|
||||
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
|
||||
|
||||
void setPreprocessorMode(bool onoff)
|
||||
{ f._ppMode = onoff; }
|
||||
|
||||
public:
|
||||
static void yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar,
|
||||
unsigned &utf16charCounter)
|
||||
@@ -95,7 +98,12 @@ private:
|
||||
void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanCharLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanUntilQuote(Token *tok, unsigned char quote);
|
||||
bool scanDigitSequence();
|
||||
bool scanExponentPart();
|
||||
void scanOptionalFloatingSuffix();
|
||||
void scanOptionalIntegerSuffix(bool allowU = true);
|
||||
void scanNumericLiteral(Token *tok);
|
||||
void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
|
||||
void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
|
||||
void scanBackslash(Kind type);
|
||||
void scanCppComment(Kind type);
|
||||
@@ -115,6 +123,7 @@ private:
|
||||
unsigned _scanCommentTokens: 1;
|
||||
unsigned _scanKeywords: 1;
|
||||
unsigned _scanAngleStringLiteralTokens: 1;
|
||||
unsigned _ppMode: 1;
|
||||
};
|
||||
|
||||
struct State {
|
||||
|
||||
@@ -41,7 +41,8 @@ using namespace CPlusPlus;
|
||||
SimpleLexer::SimpleLexer()
|
||||
: _lastState(0),
|
||||
_skipComments(false),
|
||||
_endedJoined(false)
|
||||
_endedJoined(false),
|
||||
_ppMode(false)
|
||||
{}
|
||||
|
||||
SimpleLexer::~SimpleLexer()
|
||||
@@ -73,6 +74,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
|
||||
Lexer lex(firstChar, lastChar);
|
||||
lex.setLanguageFeatures(_languageFeatures);
|
||||
lex.setStartWithNewline(true);
|
||||
lex.setPreprocessorMode(_ppMode);
|
||||
|
||||
if (! _skipComments)
|
||||
lex.setScanCommentTokens(true);
|
||||
|
||||
@@ -51,6 +51,9 @@ public:
|
||||
bool skipComments() const;
|
||||
void setSkipComments(bool skipComments);
|
||||
|
||||
void setPreprocessorMode(bool ppMode)
|
||||
{ _ppMode = ppMode; }
|
||||
|
||||
LanguageFeatures languageFeatures() const { return _languageFeatures; }
|
||||
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
|
||||
|
||||
@@ -74,6 +77,7 @@ private:
|
||||
LanguageFeatures _languageFeatures;
|
||||
bool _skipComments: 1;
|
||||
bool _endedJoined: 1;
|
||||
bool _ppMode: 1;
|
||||
};
|
||||
|
||||
} // namespace CPlusPlus
|
||||
|
||||
@@ -401,6 +401,9 @@ protected:
|
||||
const char *end = spell + len;
|
||||
char *vend = const_cast<char *>(end);
|
||||
_value.set_long(strtol(spell, &vend, 0));
|
||||
// TODO: if (vend != end) error(NaN)
|
||||
// TODO: binary literals
|
||||
// TODO: float literals
|
||||
++(*_lex);
|
||||
} else if (isTokenDefined()) {
|
||||
++(*_lex);
|
||||
@@ -1388,6 +1391,7 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
|
||||
m_state.m_lexer = new Lexer(source.constBegin(), source.constEnd());
|
||||
m_state.m_lexer->setScanKeywords(false);
|
||||
m_state.m_lexer->setScanAngleStringLiteralTokens(false);
|
||||
m_state.m_lexer->setPreprocessorMode(true);
|
||||
if (m_keepComments)
|
||||
m_state.m_lexer->setScanCommentTokens(true);
|
||||
m_state.m_result = result;
|
||||
@@ -1803,6 +1807,7 @@ const PPToken Preprocessor::evalExpression(PPToken *tk, Value &result)
|
||||
PPToken lastConditionToken;
|
||||
const QByteArray expanded = expand(tk, &lastConditionToken);
|
||||
Lexer lexer(expanded.constData(), expanded.constData() + expanded.size());
|
||||
lexer.setPreprocessorMode(true);
|
||||
std::vector<Token> buf;
|
||||
Token t;
|
||||
do {
|
||||
|
||||
@@ -70,6 +70,10 @@ private slots:
|
||||
void basic_data();
|
||||
void incremental();
|
||||
void incremental_data();
|
||||
void literals();
|
||||
void literals_data();
|
||||
void preprocessor();
|
||||
void preprocessor_data();
|
||||
|
||||
void bytes_and_utf16chars();
|
||||
void bytes_and_utf16chars_data();
|
||||
@@ -82,7 +86,8 @@ private:
|
||||
void run(const QByteArray &source,
|
||||
const Tokens &expectedTokens,
|
||||
bool preserveState,
|
||||
TokenCompareFlags compareFlags);
|
||||
TokenCompareFlags compareFlags,
|
||||
bool preprocessorMode = false);
|
||||
|
||||
int _state;
|
||||
};
|
||||
@@ -103,11 +108,13 @@ Tokens tst_SimpleLexer::toTokens(const TokenKindList &tokenKinds)
|
||||
void tst_SimpleLexer::run(const QByteArray &source,
|
||||
const Tokens &expectedTokens,
|
||||
bool preserveState,
|
||||
TokenCompareFlags compareFlags)
|
||||
TokenCompareFlags compareFlags,
|
||||
bool preprocessorMode)
|
||||
{
|
||||
QVERIFY(compareFlags);
|
||||
|
||||
SimpleLexer lexer;
|
||||
lexer.setPreprocessorMode(preprocessorMode);
|
||||
const Tokens tokens = lexer(source, preserveState ? _state : 0);
|
||||
if (preserveState)
|
||||
_state = lexer.state();
|
||||
@@ -140,7 +147,10 @@ void tst_SimpleLexer::run(const QByteArray &source,
|
||||
if (compareFlags & CompareUtf16CharsEnd)
|
||||
QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd());
|
||||
}
|
||||
QVERIFY2(i == expectedTokens.size(), "Less tokens than expected.");
|
||||
|
||||
QString msg = QLatin1String("Less tokens than expected: got %1, expected %2.");
|
||||
msg = msg.arg(i).arg(expectedTokens.size());
|
||||
QVERIFY2(i == expectedTokens.size(), msg.toUtf8().constData());
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::basic()
|
||||
@@ -254,6 +264,97 @@ void tst_SimpleLexer::basic_data()
|
||||
QTest::newRow(source) << source << expectedTokenKindList;
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::literals()
|
||||
{
|
||||
QFETCH(QByteArray, source);
|
||||
QFETCH(TokenKindList, expectedTokenKindList);
|
||||
|
||||
run(source, toTokens(expectedTokenKindList), false, CompareKind);
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::literals_data()
|
||||
{
|
||||
QTest::addColumn<QByteArray>("source");
|
||||
QTest::addColumn<TokenKindList>("expectedTokenKindList");
|
||||
|
||||
QByteArray source;
|
||||
TokenKindList expectedTokenKindList;
|
||||
|
||||
source =
|
||||
"1.\n"
|
||||
"1.1\n"
|
||||
"1.23456789\n"
|
||||
".1\n"
|
||||
".3e8\n"
|
||||
".3e8f\n"
|
||||
"1e1\n"
|
||||
"1E1\n"
|
||||
"-1e-1\n" // the first minus sign is a separate token!
|
||||
"1e-1\n"
|
||||
"1e+1\n"
|
||||
"1e1L\n"
|
||||
"1e1l\n"
|
||||
"1e1f\n"
|
||||
"1e1F\n"
|
||||
"23.45x"
|
||||
".45x"
|
||||
;
|
||||
expectedTokenKindList =
|
||||
TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_MINUS
|
||||
<< T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_ERROR << T_ERROR
|
||||
;
|
||||
QTest::newRow("float-literals") << source << expectedTokenKindList;
|
||||
|
||||
source = // these are all the same
|
||||
"42\n"
|
||||
"0b101010u\n"
|
||||
"052ll\n"
|
||||
"0x2aL\n"
|
||||
"123FOO\n"
|
||||
"0xfOo\n"
|
||||
"33_\n"
|
||||
;
|
||||
expectedTokenKindList =
|
||||
TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR
|
||||
;
|
||||
QTest::newRow("integer-literals") << source << expectedTokenKindList;
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::preprocessor()
|
||||
{
|
||||
QFETCH(QByteArray, source);
|
||||
QFETCH(TokenKindList, expectedTokenKindList);
|
||||
|
||||
run(source, toTokens(expectedTokenKindList), false, CompareKind, true);
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::preprocessor_data()
|
||||
{
|
||||
QTest::addColumn<QByteArray>("source");
|
||||
QTest::addColumn<TokenKindList>("expectedTokenKindList");
|
||||
|
||||
QByteArray source;
|
||||
TokenKindList expectedTokenKindList;
|
||||
|
||||
source = // sad but true [2.10]
|
||||
"1\n"
|
||||
"1x.\n"
|
||||
"1.y\n"
|
||||
".1_1.1.\n"
|
||||
"1e-\n"
|
||||
"01x1b2qWeRtty_Grumble+E-.\n"
|
||||
;
|
||||
expectedTokenKindList =
|
||||
TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
|
||||
<< T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL;
|
||||
QTest::newRow("pp-number") << source << expectedTokenKindList;
|
||||
}
|
||||
|
||||
void tst_SimpleLexer::bytes_and_utf16chars()
|
||||
{
|
||||
QFETCH(QByteArray, source);
|
||||
|
||||
Reference in New Issue
Block a user