forked from qt-creator/qt-creator
C++: clean up numeric literal parsing and add support for n3472.
Separate the messy pp-number parsing from the numeric literal parsing. The C/C++ preprocessor makes a grown man cry, but at least we have "proper" literal parsing when we want it, including C++1y binary literals. Next step is digit separators (n3781). Change-Id: Ia069eef454ed5c056f77694a5b8a595d0b76adc4 Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
This commit is contained in:
committed by
Nikolai Kosjar
parent
16becbd29c
commit
242b3f4110
167
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
167
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
@@ -305,24 +305,27 @@ void Lexer::scan_helper(Token *tok)
|
||||
tok->f.kind = T_ERROR;
|
||||
}
|
||||
} else if (std::isdigit(_yychar)) {
|
||||
if (f._ppMode) {
|
||||
scanPreprocessorNumber(tok, true);
|
||||
break;
|
||||
}
|
||||
|
||||
const char *yytext = _currentChar - 2;
|
||||
do {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
scanDigitSequence(); // this is optional: we already skipped over the first digit
|
||||
scanExponentPart();
|
||||
scanOptionalFloatingSuffix();
|
||||
if (std::isalnum(_yychar) || _yychar == '_') {
|
||||
do {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
yyinp();
|
||||
// ### CPP_CHECK(std::isdigit(_yychar));
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (_yychar);
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
} while (std::isalnum(_yychar) || _yychar == '_');
|
||||
tok->f.kind = T_ERROR;
|
||||
} else {
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
} else {
|
||||
tok->f.kind = T_DOT;
|
||||
}
|
||||
@@ -651,7 +654,10 @@ void Lexer::scan_helper(Token *tok)
|
||||
} else if (std::isalpha(ch) || ch == '_' || ch == '$' || isByteOfMultiByteCodePoint(ch)) {
|
||||
scanIdentifier(tok, _currentChar - _tokenStart - 1);
|
||||
} else if (std::isdigit(ch)) {
|
||||
scanNumericLiteral(tok);
|
||||
if (f._ppMode)
|
||||
scanPreprocessorNumber(tok, false);
|
||||
else
|
||||
scanNumericLiteral(tok);
|
||||
} else {
|
||||
tok->f.kind = T_ERROR;
|
||||
}
|
||||
@@ -776,26 +782,141 @@ void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
bool Lexer::scanDigitSequence()
|
||||
{
|
||||
if (!std::isdigit(_yychar))
|
||||
return false;
|
||||
yyinp();
|
||||
while (std::isdigit(_yychar))
|
||||
yyinp();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Lexer::scanExponentPart()
|
||||
{
|
||||
if (_yychar != 'e' && _yychar != 'E')
|
||||
return false;
|
||||
yyinp();
|
||||
if (_yychar == '+' || _yychar == '-')
|
||||
yyinp();
|
||||
return scanDigitSequence();
|
||||
}
|
||||
|
||||
void Lexer::scanOptionalFloatingSuffix()
|
||||
{
|
||||
if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
|
||||
yyinp();
|
||||
}
|
||||
|
||||
void Lexer::scanOptionalIntegerSuffix(bool allowU)
|
||||
{
|
||||
switch(_yychar) {
|
||||
case 'u':
|
||||
case 'U':
|
||||
if (allowU) {
|
||||
yyinp();
|
||||
scanOptionalIntegerSuffix(false);
|
||||
}
|
||||
return;
|
||||
case 'l':
|
||||
yyinp();
|
||||
if (_yychar == 'l')
|
||||
yyinp();
|
||||
return;
|
||||
case 'L':
|
||||
yyinp();
|
||||
if (_yychar == 'L')
|
||||
yyinp();
|
||||
return;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::scanNumericLiteral(Token *tok)
|
||||
{
|
||||
const char *yytext = _currentChar - 1;
|
||||
if (*yytext == '0' && _yychar) {
|
||||
if (_yychar == 'x' || _yychar == 'X') {
|
||||
yyinp();
|
||||
while (std::isdigit(_yychar) ||
|
||||
(_yychar >= 'a' && _yychar <= 'f') ||
|
||||
(_yychar >= 'A' && _yychar <= 'F')) {
|
||||
yyinp();
|
||||
}
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
} else if (_yychar == 'b' || _yychar == 'B') { // see n3472
|
||||
yyinp();
|
||||
while (_yychar == '0' || _yychar == '1')
|
||||
yyinp();
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
} else if (_yychar >= '0' && _yychar <= '7') {
|
||||
do {
|
||||
yyinp();
|
||||
} while (_yychar >= '0' && _yychar <= '7');
|
||||
scanOptionalIntegerSuffix();
|
||||
goto theEnd;
|
||||
}
|
||||
}
|
||||
|
||||
while (_yychar) {
|
||||
if (_yychar == '.') {
|
||||
yyinp();
|
||||
scanDigitSequence(); // this is optional: "1." is a valid floating point number
|
||||
scanExponentPart();
|
||||
scanOptionalFloatingSuffix();
|
||||
break;
|
||||
} else if (_yychar == 'e' || _yychar == 'E') {
|
||||
if (scanExponentPart())
|
||||
scanOptionalFloatingSuffix();
|
||||
break;
|
||||
} else if (std::isdigit(_yychar)) {
|
||||
yyinp();
|
||||
} else {
|
||||
scanOptionalIntegerSuffix();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
theEnd:
|
||||
if (std::isalnum(_yychar) || _yychar == '_') {
|
||||
do {
|
||||
yyinp();
|
||||
} while (std::isalnum(_yychar) || _yychar == '_');
|
||||
tok->f.kind = T_ERROR;
|
||||
} else {
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
|
||||
{
|
||||
const char *yytext = _currentChar - (dotAlreadySkipped ? 2 : 1);
|
||||
if (dotAlreadySkipped &&
|
||||
(!_yychar || (_yychar && !std::isdigit(_yychar)))) {
|
||||
tok->f.kind = T_DOT;
|
||||
return;
|
||||
}
|
||||
|
||||
while (_yychar) {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
if (_yychar == '+' || _yychar == '-')
|
||||
yyinp();
|
||||
// ### CPP_CHECK(std::isdigit(_yychar));
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
} else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
9
src/libs/3rdparty/cplusplus/Lexer.h
vendored
9
src/libs/3rdparty/cplusplus/Lexer.h
vendored
@@ -61,6 +61,9 @@ public:
|
||||
LanguageFeatures languageFeatures() const { return _languageFeatures; }
|
||||
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
|
||||
|
||||
void setPreprocessorMode(bool onoff)
|
||||
{ f._ppMode = onoff; }
|
||||
|
||||
public:
|
||||
static void yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar,
|
||||
unsigned &utf16charCounter)
|
||||
@@ -95,7 +98,12 @@ private:
|
||||
void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanCharLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanUntilQuote(Token *tok, unsigned char quote);
|
||||
bool scanDigitSequence();
|
||||
bool scanExponentPart();
|
||||
void scanOptionalFloatingSuffix();
|
||||
void scanOptionalIntegerSuffix(bool allowU = true);
|
||||
void scanNumericLiteral(Token *tok);
|
||||
void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
|
||||
void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
|
||||
void scanBackslash(Kind type);
|
||||
void scanCppComment(Kind type);
|
||||
@@ -115,6 +123,7 @@ private:
|
||||
unsigned _scanCommentTokens: 1;
|
||||
unsigned _scanKeywords: 1;
|
||||
unsigned _scanAngleStringLiteralTokens: 1;
|
||||
unsigned _ppMode: 1;
|
||||
};
|
||||
|
||||
struct State {
|
||||
|
||||
Reference in New Issue
Block a user