C++: Basic support for C++11 user-defined literals

1. Extends lexer so digit or string can be followed by underscore '_' and
   alphanumeric defining literal.

2. Extends parser so it accepts operator"" _abc(...) user-defined literal
   definition.

3. Adds Token::Flags.userDefinedLiteral bool flag field representing if token
   carries user-defined literal.

4. Adds C++11 auto tests case with: 12_km, 0.5_Pa, 'c'_X, "abd"_L, u"xyz"_M

5. All optional suffix scanning methods now return boolean if the suffix was
   found.

6. Adds C++ Lexer tests for user-defined literals with C++11 feature enabled.

This change however does not make QtCreator understand user-defined literal
semantics, e.g. properly resolve type when applying custom literal operator.

Change-Id: I30e62f025ec9fb11c39261985ea4d772b1a80949
Reviewed-by: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>
This commit is contained in:
Adam Strzelecki
2014-11-02 14:42:23 +01:00
committed by Nikolai Kosjar
parent 5699991a2f
commit 425811291d
7 changed files with 115 additions and 22 deletions

View File

@@ -314,7 +314,8 @@ void Lexer::scan_helper(Token *tok)
yyinp();
scanDigitSequence(); // this is optional: we already skipped over the first digit
scanExponentPart();
scanOptionalFloatingSuffix();
if (!scanOptionalFloatingSuffix())
scanOptionalUserDefinedLiteral(tok);
if (std::isalnum(_yychar) || _yychar == '_') {
do {
yyinp();
@@ -683,6 +684,7 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_STRING_LITERAL;
scanUntilQuote(tok, '"');
scanOptionalUserDefinedLiteral(tok);
}
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
@@ -758,6 +760,7 @@ void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
tok->f.kind = T_CHAR_LITERAL;
scanUntilQuote(tok, '\'');
scanOptionalUserDefinedLiteral(tok);
}
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
@@ -802,13 +805,16 @@ bool Lexer::scanExponentPart()
return scanDigitSequence();
}
void Lexer::scanOptionalFloatingSuffix()
bool Lexer::scanOptionalFloatingSuffix()
{
if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L') {
yyinp();
return true;
}
return false;
}
void Lexer::scanOptionalIntegerSuffix(bool allowU)
bool Lexer::scanOptionalIntegerSuffix(bool allowU)
{
switch(_yychar) {
case 'u':
@@ -817,19 +823,28 @@ void Lexer::scanOptionalIntegerSuffix(bool allowU)
yyinp();
scanOptionalIntegerSuffix(false);
}
return;
return true;
case 'l':
yyinp();
if (_yychar == 'l')
yyinp();
return;
return true;
case 'L':
yyinp();
if (_yychar == 'L')
yyinp();
return;
return true;
default:
return;
return false;
}
}
void Lexer::scanOptionalUserDefinedLiteral(Token *tok)
{
if (_languageFeatures.cxx11Enabled && _yychar == '_') {
tok->f.userDefinedLiteral = true;
while (std::isalnum(_yychar) || _yychar == '_' || isByteOfMultiByteCodePoint(_yychar))
yyinp();
}
}
@@ -844,19 +859,22 @@ void Lexer::scanNumericLiteral(Token *tok)
(_yychar >= 'A' && _yychar <= 'F')) {
yyinp();
}
scanOptionalIntegerSuffix();
if (!scanOptionalIntegerSuffix())
scanOptionalUserDefinedLiteral(tok);
goto theEnd;
} else if (_yychar == 'b' || _yychar == 'B') { // see n3472
yyinp();
while (_yychar == '0' || _yychar == '1')
yyinp();
scanOptionalIntegerSuffix();
if (!scanOptionalIntegerSuffix())
scanOptionalUserDefinedLiteral(tok);
goto theEnd;
} else if (_yychar >= '0' && _yychar <= '7') {
do {
yyinp();
} while (_yychar >= '0' && _yychar <= '7');
scanOptionalIntegerSuffix();
if (!scanOptionalIntegerSuffix())
scanOptionalUserDefinedLiteral(tok);
goto theEnd;
}
}
@@ -866,16 +884,18 @@ void Lexer::scanNumericLiteral(Token *tok)
yyinp();
scanDigitSequence(); // this is optional: "1." is a valid floating point number
scanExponentPart();
scanOptionalFloatingSuffix();
if (!scanOptionalFloatingSuffix())
scanOptionalUserDefinedLiteral(tok);
break;
} else if (_yychar == 'e' || _yychar == 'E') {
if (scanExponentPart())
scanOptionalFloatingSuffix();
if (scanExponentPart() && !scanOptionalFloatingSuffix())
scanOptionalUserDefinedLiteral(tok);
break;
} else if (std::isdigit(_yychar)) {
yyinp();
} else {
scanOptionalIntegerSuffix();
if (!scanOptionalIntegerSuffix())
scanOptionalUserDefinedLiteral(tok);
break;
}
}
@@ -911,6 +931,7 @@ void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
} else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
yyinp();
} else {
scanOptionalUserDefinedLiteral(tok);
break;
}
}

View File

@@ -100,8 +100,9 @@ private:
void scanUntilQuote(Token *tok, unsigned char quote);
bool scanDigitSequence();
bool scanExponentPart();
void scanOptionalFloatingSuffix();
void scanOptionalIntegerSuffix(bool allowU = true);
bool scanOptionalFloatingSuffix();
bool scanOptionalIntegerSuffix(bool allowU = true);
void scanOptionalUserDefinedLiteral(Token *tok);
void scanNumericLiteral(Token *tok);
void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);

View File

@@ -1274,6 +1274,14 @@ bool Parser::parseOperator(OperatorAST *&node) // ### FIXME
} else if (LA() == T_LBRACKET && LA(2) == T_RBRACKET) {
ast->op_token = ast->open_token = consumeToken();
ast->close_token = consumeToken();
} else if (_languageFeatures.cxx11Enabled &&
LA() == T_STRING_LITERAL && LA(2) == T_IDENTIFIER &&
!tok().f.userDefinedLiteral && tok().string->size() == 0 &&
tok(2).identifier->size() > 1 && tok(2).identifier->chars()[0] == '_') {
// C++11 user-defined literal operator, e.g.:
// int operator"" _abc123(const char *str, size_t size) { ... }
ast->op_token = consumeToken();
consumeToken(); // consume literal operator identifier
} else {
return false;
}

View File

@@ -302,6 +302,7 @@ public:
inline bool joined() const { return f.joined; }
inline bool expanded() const { return f.expanded; }
inline bool generated() const { return f.generated; }
inline bool userDefinedLiteral() const { return f.userDefinedLiteral; }
inline unsigned bytes() const { return f.bytes; }
inline unsigned bytesBegin() const { return byteOffset; }
@@ -363,8 +364,11 @@ public:
// Tokens '1', '+', '2', and ';' are all expanded. However only tokens '+' and ';'
// are generated.
unsigned generated : 1;
// The token is C++11 user-defined literal such as:
// 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
unsigned userDefinedLiteral : 1;
// Unused...
unsigned pad : 3;
unsigned pad : 2;
// The token length in bytes and UTF16 chars.
unsigned bytes : 16;
unsigned utf16chars : 16;