forked from qt-creator/qt-creator
C++: Introduce unicode char/strings support
Those are the types char16_t and char32_t along with the new char/string literals u'', U'', u"", u8"", and U"". This is particularly important for the use of QStringLiteral since in some platforms it relies on expansion such as above. Note: The string literals quickfixes still need some tunning. Task-number: QTCREATORBUG-7449 Change-Id: Iebcfea15677dc8e0ebb6143def89a5477e1be7d4 Reviewed-by: hjk <qthjk@ovi.com>
This commit is contained in:
12
src/libs/3rdparty/cplusplus/Bind.cpp
vendored
12
src/libs/3rdparty/cplusplus/Bind.cpp
vendored
@@ -2750,6 +2750,18 @@ bool Bind::visit(SimpleSpecifierAST *ast)
|
||||
_type.setType(control()->integerType(IntegerType::Char));
|
||||
break;
|
||||
|
||||
case T_CHAR16_T:
|
||||
if (_type)
|
||||
translationUnit()->error(ast->specifier_token, "duplicate data type in declaration");
|
||||
_type.setType(control()->integerType(IntegerType::Char16));
|
||||
break;
|
||||
|
||||
case T_CHAR32_T:
|
||||
if (_type)
|
||||
translationUnit()->error(ast->specifier_token, "duplicate data type in declaration");
|
||||
_type.setType(control()->integerType(IntegerType::Char32));
|
||||
break;
|
||||
|
||||
case T_WCHAR_T:
|
||||
if (_type)
|
||||
translationUnit()->error(ast->specifier_token, "duplicate data type in declaration");
|
||||
|
2
src/libs/3rdparty/cplusplus/CoreTypes.h
vendored
2
src/libs/3rdparty/cplusplus/CoreTypes.h
vendored
@@ -70,6 +70,8 @@ class CPLUSPLUS_EXPORT IntegerType: public Type
|
||||
public:
|
||||
enum Kind {
|
||||
Char,
|
||||
Char16,
|
||||
Char32,
|
||||
WideChar,
|
||||
Bool,
|
||||
Short,
|
||||
|
22
src/libs/3rdparty/cplusplus/Keywords.cpp
vendored
22
src/libs/3rdparty/cplusplus/Keywords.cpp
vendored
@@ -778,6 +778,28 @@ static inline int classify8(const char *s, bool q, bool x) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (x && s[1] == 'h') {
|
||||
if (s[2] == 'a') {
|
||||
if (s[3] == 'r') {
|
||||
if (s[4] == '1') {
|
||||
if (s[5] == '6') {
|
||||
if (s[6] == '_') {
|
||||
if (s[7] == 't') {
|
||||
return T_CHAR16_T;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s[4] == '3') {
|
||||
if (s[5] == '2') {
|
||||
if (s[6] == '_') {
|
||||
if (s[7] == 't') {
|
||||
return T_CHAR32_T;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (x && s[0] == 'd') {
|
||||
|
248
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
248
src/libs/3rdparty/cplusplus/Lexer.cpp
vendored
@@ -224,37 +224,13 @@ void Lexer::scan_helper(Token *tok)
|
||||
}
|
||||
goto _Lagain;
|
||||
|
||||
case '"': case '\'': {
|
||||
const char quote = ch;
|
||||
case '"':
|
||||
scanStringLiteral(tok);
|
||||
break;
|
||||
|
||||
tok->f.kind = quote == '"'
|
||||
? T_STRING_LITERAL
|
||||
: T_CHAR_LITERAL;
|
||||
|
||||
const char *yytext = _currentChar;
|
||||
|
||||
while (_yychar && _yychar != quote) {
|
||||
if (_yychar == '\n')
|
||||
break;
|
||||
else if (_yychar != '\\')
|
||||
yyinp();
|
||||
else {
|
||||
yyinp(); // skip `\\'
|
||||
|
||||
if (_yychar)
|
||||
yyinp();
|
||||
}
|
||||
}
|
||||
// assert(_yychar == quote);
|
||||
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
if (_yychar == quote)
|
||||
yyinp();
|
||||
|
||||
if (control())
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
} break;
|
||||
case '\'':
|
||||
scanCharLiteral(tok);
|
||||
break;
|
||||
|
||||
case '{':
|
||||
tok->f.kind = T_LBRACE;
|
||||
@@ -589,112 +565,148 @@ void Lexer::scan_helper(Token *tok)
|
||||
tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
|
||||
break;
|
||||
} else if (ch == '@' && _yychar == '"') {
|
||||
// objc @string literals
|
||||
yyinp();
|
||||
tok->f.kind = T_AT_STRING_LITERAL;
|
||||
|
||||
const char *yytext = _currentChar;
|
||||
|
||||
while (_yychar && _yychar != '"') {
|
||||
if (_yychar != '\\')
|
||||
yyinp();
|
||||
else {
|
||||
yyinp(); // skip `\\'
|
||||
|
||||
if (_yychar)
|
||||
yyinp();
|
||||
}
|
||||
}
|
||||
// assert(_yychar == '"');
|
||||
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
if (_yychar == '"')
|
||||
yyinp();
|
||||
|
||||
if (control())
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
|
||||
scanStringLiteral(tok, '"');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ch == 'L' && (_yychar == '"' || _yychar == '\'')) {
|
||||
// wide char/string literals
|
||||
ch = _yychar;
|
||||
yyinp();
|
||||
|
||||
const char quote = ch;
|
||||
|
||||
tok->f.kind = quote == '"'
|
||||
? T_WIDE_STRING_LITERAL
|
||||
: T_WIDE_CHAR_LITERAL;
|
||||
|
||||
const char *yytext = _currentChar;
|
||||
|
||||
while (_yychar && _yychar != quote) {
|
||||
if (_yychar != '\\')
|
||||
yyinp();
|
||||
else {
|
||||
yyinp(); // skip `\\'
|
||||
|
||||
if (_yychar)
|
||||
yyinp();
|
||||
}
|
||||
}
|
||||
// assert(_yychar == quote);
|
||||
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
if (_yychar == quote)
|
||||
if (ch == 'L' || ch == 'u' || ch == 'U') {
|
||||
// Either a literal or still an identifier.
|
||||
if (_yychar == '"') {
|
||||
yyinp();
|
||||
|
||||
if (control())
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
} else if (std::isalpha(ch) || ch == '_' || ch == '$') {
|
||||
const char *yytext = _currentChar - 1;
|
||||
while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
|
||||
scanStringLiteral(tok, ch);
|
||||
} else if (_yychar == '\'') {
|
||||
yyinp();
|
||||
int yylen = _currentChar - yytext;
|
||||
if (f._scanKeywords)
|
||||
tok->f.kind = classify(yytext, yylen, f._qtMocRunEnabled, f._cxx0xEnabled);
|
||||
else
|
||||
tok->f.kind = T_IDENTIFIER;
|
||||
|
||||
if (tok->f.kind == T_IDENTIFIER) {
|
||||
tok->f.kind = classifyOperator(yytext, yylen);
|
||||
|
||||
if (control())
|
||||
tok->identifier = control()->identifier(yytext, yylen);
|
||||
}
|
||||
break;
|
||||
} else if (std::isdigit(ch)) {
|
||||
const char *yytext = _currentChar - 1;
|
||||
while (_yychar) {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
scanCharLiteral(tok, ch);
|
||||
} else {
|
||||
if (_yychar == '8') {
|
||||
unsigned char la = 0;
|
||||
if (_currentChar + 1 != _lastChar)
|
||||
la = *(_currentChar + 1);
|
||||
if (la == '"') {
|
||||
yyinp();
|
||||
// ### assert(std::isdigit(_yychar));
|
||||
yyinp();
|
||||
scanStringLiteral(tok, '8');
|
||||
} else if (la == '\'') {
|
||||
yyinp();
|
||||
yyinp();
|
||||
scanCharLiteral(tok, '8');
|
||||
} else {
|
||||
scanIdentifier(tok);
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
scanIdentifier(tok);
|
||||
}
|
||||
}
|
||||
int yylen = _currentChar - yytext;
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
break;
|
||||
} else if (std::isalpha(ch) || ch == '_' || ch == '$') {
|
||||
scanIdentifier(tok);
|
||||
} else if (std::isdigit(ch)) {
|
||||
scanNumericLiteral(tok);
|
||||
} else {
|
||||
tok->f.kind = T_ERROR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
} // default
|
||||
|
||||
} // switch
|
||||
}
|
||||
|
||||
void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
|
||||
{
|
||||
scanUntilQuote(tok, '"');
|
||||
|
||||
if (hint == 'L')
|
||||
tok->f.kind = T_WIDE_STRING_LITERAL;
|
||||
else if (hint == 'U')
|
||||
tok->f.kind = T_UTF32_STRING_LITERAL;
|
||||
else if (hint == 'u')
|
||||
tok->f.kind = T_UTF16_STRING_LITERAL;
|
||||
else if (hint == '8')
|
||||
tok->f.kind = T_UTF8_STRING_LITERAL;
|
||||
else if (hint == '@')
|
||||
tok->f.kind = T_AT_STRING_LITERAL;
|
||||
else
|
||||
tok->f.kind = T_STRING_LITERAL;
|
||||
}
|
||||
|
||||
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
|
||||
{
|
||||
scanUntilQuote(tok, '\'');
|
||||
|
||||
if (hint == 'L')
|
||||
tok->f.kind = T_WIDE_CHAR_LITERAL;
|
||||
else if (hint == 'U')
|
||||
tok->f.kind = T_UTF32_CHAR_LITERAL;
|
||||
else if (hint == 'u')
|
||||
tok->f.kind = T_UTF16_CHAR_LITERAL;
|
||||
else
|
||||
tok->f.kind = T_CHAR_LITERAL;
|
||||
}
|
||||
|
||||
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
|
||||
{
|
||||
assert(quote == '"' || quote == '\'');
|
||||
|
||||
const char *yytext = _currentChar;
|
||||
while (_yychar && _yychar != quote) {
|
||||
if (_yychar != '\\')
|
||||
yyinp();
|
||||
else {
|
||||
yyinp(); // skip `\\'
|
||||
|
||||
if (_yychar)
|
||||
yyinp();
|
||||
}
|
||||
}
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
if (_yychar == quote)
|
||||
yyinp();
|
||||
|
||||
if (control())
|
||||
tok->string = control()->stringLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
void Lexer::scanNumericLiteral(Token *tok)
|
||||
{
|
||||
const char *yytext = _currentChar - 1;
|
||||
while (_yychar) {
|
||||
if (_yychar == 'e' || _yychar == 'E') {
|
||||
yyinp();
|
||||
if (_yychar == '-' || _yychar == '+') {
|
||||
yyinp();
|
||||
// ### assert(std::isdigit(_yychar));
|
||||
}
|
||||
} else if (std::isalnum(_yychar) || _yychar == '.') {
|
||||
yyinp();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
int yylen = _currentChar - yytext;
|
||||
|
||||
tok->f.kind = T_NUMERIC_LITERAL;
|
||||
|
||||
if (control())
|
||||
tok->number = control()->numericLiteral(yytext, yylen);
|
||||
}
|
||||
|
||||
void Lexer::scanIdentifier(Token *tok)
|
||||
{
|
||||
const char *yytext = _currentChar - 1;
|
||||
while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
|
||||
yyinp();
|
||||
int yylen = _currentChar - yytext;
|
||||
if (f._scanKeywords)
|
||||
tok->f.kind = classify(yytext, yylen, f._qtMocRunEnabled, f._cxx0xEnabled);
|
||||
else
|
||||
tok->f.kind = T_IDENTIFIER;
|
||||
|
||||
if (tok->f.kind == T_IDENTIFIER) {
|
||||
tok->f.kind = classifyOperator(yytext, yylen);
|
||||
|
||||
if (control())
|
||||
tok->identifier = control()->identifier(yytext, yylen);
|
||||
}
|
||||
}
|
||||
|
6
src/libs/3rdparty/cplusplus/Lexer.h
vendored
6
src/libs/3rdparty/cplusplus/Lexer.h
vendored
@@ -90,6 +90,12 @@ private:
|
||||
static int classifyObjCAtKeyword(const char *s, int n);
|
||||
static int classifyOperator(const char *string, int length);
|
||||
|
||||
void scanStringLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanCharLiteral(Token *tok, unsigned char hint = 0);
|
||||
void scanUntilQuote(Token *tok, unsigned char quote);
|
||||
void scanNumericLiteral(Token *tok);
|
||||
void scanIdentifier(Token *tok);
|
||||
|
||||
inline void yyinp()
|
||||
{
|
||||
if (++_currentChar == _lastChar)
|
||||
|
26
src/libs/3rdparty/cplusplus/Parser.cpp
vendored
26
src/libs/3rdparty/cplusplus/Parser.cpp
vendored
@@ -313,6 +313,8 @@ bool Parser::skipUntilStatement()
|
||||
case T_CATCH:
|
||||
case T_THROW:
|
||||
case T_CHAR:
|
||||
case T_CHAR16_T:
|
||||
case T_CHAR32_T:
|
||||
case T_WCHAR_T:
|
||||
case T_BOOL:
|
||||
case T_SHORT:
|
||||
@@ -2811,12 +2813,21 @@ bool Parser::parseUnqualifiedName(NameAST *&node, bool acceptTemplateId)
|
||||
bool Parser::parseStringLiteral(ExpressionAST *&node)
|
||||
{
|
||||
DEBUG_THIS_RULE();
|
||||
if (! (LA() == T_STRING_LITERAL || LA() == T_WIDE_STRING_LITERAL))
|
||||
if (! (LA() == T_STRING_LITERAL
|
||||
|| LA() == T_WIDE_STRING_LITERAL
|
||||
|| LA() == T_UTF8_STRING_LITERAL
|
||||
|| LA() == T_UTF16_STRING_LITERAL
|
||||
|| LA() == T_UTF32_STRING_LITERAL)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
StringLiteralAST **ast = reinterpret_cast<StringLiteralAST **> (&node);
|
||||
|
||||
while (LA() == T_STRING_LITERAL || LA() == T_WIDE_STRING_LITERAL) {
|
||||
while (LA() == T_STRING_LITERAL
|
||||
|| LA() == T_WIDE_STRING_LITERAL
|
||||
|| LA() == T_UTF8_STRING_LITERAL
|
||||
|| LA() == T_UTF16_STRING_LITERAL
|
||||
|| LA() == T_UTF32_STRING_LITERAL) {
|
||||
*ast = new (_pool) StringLiteralAST;
|
||||
(*ast)->literal_token = consumeToken();
|
||||
ast = &(*ast)->next;
|
||||
@@ -3541,6 +3552,8 @@ bool Parser::lookAtBuiltinTypeSpecifier() const
|
||||
{
|
||||
switch (LA()) {
|
||||
case T_CHAR:
|
||||
case T_CHAR16_T:
|
||||
case T_CHAR32_T:
|
||||
case T_WCHAR_T:
|
||||
case T_BOOL:
|
||||
case T_SHORT:
|
||||
@@ -3982,7 +3995,9 @@ bool Parser::parseNumericLiteral(ExpressionAST *&node)
|
||||
DEBUG_THIS_RULE();
|
||||
if (LA() == T_NUMERIC_LITERAL ||
|
||||
LA() == T_CHAR_LITERAL ||
|
||||
LA() == T_WIDE_CHAR_LITERAL) {
|
||||
LA() == T_WIDE_CHAR_LITERAL ||
|
||||
LA() == T_UTF16_CHAR_LITERAL ||
|
||||
LA() == T_UTF32_CHAR_LITERAL) {
|
||||
NumericLiteralAST *ast = new (_pool) NumericLiteralAST;
|
||||
ast->literal_token = consumeToken();
|
||||
node = ast;
|
||||
@@ -4021,6 +4036,9 @@ bool Parser::parsePrimaryExpression(ExpressionAST *&node)
|
||||
switch (LA()) {
|
||||
case T_STRING_LITERAL:
|
||||
case T_WIDE_STRING_LITERAL:
|
||||
case T_UTF8_STRING_LITERAL:
|
||||
case T_UTF16_STRING_LITERAL:
|
||||
case T_UTF32_STRING_LITERAL:
|
||||
return parseStringLiteral(node);
|
||||
|
||||
case T_NULLPTR:
|
||||
@@ -4030,6 +4048,8 @@ bool Parser::parsePrimaryExpression(ExpressionAST *&node)
|
||||
|
||||
case T_CHAR_LITERAL: // ### FIXME don't use NumericLiteral for chars
|
||||
case T_WIDE_CHAR_LITERAL:
|
||||
case T_UTF16_CHAR_LITERAL:
|
||||
case T_UTF32_CHAR_LITERAL:
|
||||
case T_NUMERIC_LITERAL:
|
||||
return parseNumericLiteral(node);
|
||||
|
||||
|
20
src/libs/3rdparty/cplusplus/Token.cpp
vendored
20
src/libs/3rdparty/cplusplus/Token.cpp
vendored
@@ -29,8 +29,12 @@ static const char *token_names[] = {
|
||||
("<C++ comment>"), ("<C++ doxy comment>"),
|
||||
("<comment>"), ("<doxy comment>"),
|
||||
|
||||
("<identifier>"), ("<numeric literal>"), ("<char literal>"),
|
||||
("<wide char literal>"), ("<string literal>"), ("<wide char literal>"),
|
||||
("<identifier>"),
|
||||
|
||||
("<numeric literal>"),
|
||||
("<char literal>"), ("<wide char literal>"), ("<utf16 char literal>"), ("<utf32 char literal>"),
|
||||
("<string literal>"), ("<wide string literal>"), ("<utf8 string literal>"),
|
||||
("<utf16 string literal>"), ("<utf32 string literal>"),
|
||||
("<@string literal>"), ("<angle string literal>"),
|
||||
|
||||
("&"), ("&&"), ("&="), ("->"), ("->*"), ("^"), ("^="), (":"), ("::"),
|
||||
@@ -40,7 +44,8 @@ static const char *token_names[] = {
|
||||
("|="), ("||"), ("+"), ("+="), ("++"), ("#"), ("##"), ("?"), ("}"),
|
||||
("]"), (")"), (";"), ("*"), ("*="), ("~"), ("~="),
|
||||
|
||||
("asm"), ("auto"), ("bool"), ("break"), ("case"), ("catch"), ("char"),
|
||||
("asm"), ("auto"), ("bool"), ("break"), ("case"), ("catch"),
|
||||
("char"), ("char16_t"), ("char32_t"),
|
||||
("class"), ("const"), ("const_cast"), ("constexpr"), ("continue"),
|
||||
("decltype"), ("default"),
|
||||
("delete"), ("do"), ("double"), ("dynamic_cast"), ("else"), ("enum"),
|
||||
@@ -92,11 +97,16 @@ const char *Token::spell() const
|
||||
|
||||
case T_NUMERIC_LITERAL:
|
||||
case T_CHAR_LITERAL:
|
||||
case T_WIDE_CHAR_LITERAL:
|
||||
case T_UTF16_CHAR_LITERAL:
|
||||
case T_UTF32_CHAR_LITERAL:
|
||||
case T_STRING_LITERAL:
|
||||
case T_WIDE_STRING_LITERAL:
|
||||
case T_UTF8_STRING_LITERAL:
|
||||
case T_UTF16_STRING_LITERAL:
|
||||
case T_UTF32_STRING_LITERAL:
|
||||
case T_AT_STRING_LITERAL:
|
||||
case T_ANGLE_STRING_LITERAL:
|
||||
case T_WIDE_CHAR_LITERAL:
|
||||
case T_WIDE_STRING_LITERAL:
|
||||
return literal->chars();
|
||||
|
||||
default:
|
||||
|
9
src/libs/3rdparty/cplusplus/Token.h
vendored
9
src/libs/3rdparty/cplusplus/Token.h
vendored
@@ -40,10 +40,15 @@ enum Kind {
|
||||
T_FIRST_CHAR_LITERAL,
|
||||
T_CHAR_LITERAL = T_FIRST_CHAR_LITERAL,
|
||||
T_WIDE_CHAR_LITERAL,
|
||||
T_LAST_CHAR_LITERAL = T_WIDE_CHAR_LITERAL,
|
||||
T_UTF16_CHAR_LITERAL,
|
||||
T_UTF32_CHAR_LITERAL,
|
||||
T_LAST_CHAR_LITERAL = T_UTF32_CHAR_LITERAL,
|
||||
T_FIRST_STRING_LITERAL,
|
||||
T_STRING_LITERAL = T_FIRST_STRING_LITERAL,
|
||||
T_WIDE_STRING_LITERAL,
|
||||
T_UTF8_STRING_LITERAL,
|
||||
T_UTF16_STRING_LITERAL,
|
||||
T_UTF32_STRING_LITERAL,
|
||||
T_AT_STRING_LITERAL,
|
||||
T_ANGLE_STRING_LITERAL,
|
||||
T_LAST_STRING_LITERAL = T_ANGLE_STRING_LITERAL,
|
||||
@@ -112,6 +117,8 @@ enum Kind {
|
||||
T_CASE,
|
||||
T_CATCH,
|
||||
T_CHAR,
|
||||
T_CHAR16_T,
|
||||
T_CHAR32_T,
|
||||
T_CLASS,
|
||||
T_CONST,
|
||||
T_CONST_CAST,
|
||||
|
@@ -205,8 +205,8 @@ void TranslationUnit::tokenize()
|
||||
unsigned line = (unsigned) strtoul(tk.spell(), 0, 0);
|
||||
lex(&tk);
|
||||
if (! tk.f.newline && tk.is(T_STRING_LITERAL)) {
|
||||
const StringLiteral *fileName = control()->stringLiteral(tk.string->chars(),
|
||||
tk.string->size());
|
||||
const StringLiteral *fileName =
|
||||
control()->stringLiteral(tk.string->chars(), tk.string->size());
|
||||
pushPreprocessorLine(offset, line, fileName);
|
||||
lex(&tk);
|
||||
}
|
||||
|
Reference in New Issue
Block a user