From e148d030f59ae1e88c68a1843d75cd5cac444439 Mon Sep 17 00:00:00 2001 From: Leandro Melo Date: Thu, 16 Aug 2012 21:17:41 +0200 Subject: [PATCH] C++: Introduce C++11 raw string literals Although they are now supported by the lexer and parser, it is worth to remind that we still need to address an issue concerning the highlight of multiline literals (which with the advent of the new raw strings will become more common). Task-number: QTCREATORBUG-6722 Change-Id: I137337a9ac0152a1f8b9faded0b960c6fe3dd38a Reviewed-by: Roberto Raggi --- src/libs/3rdparty/cplusplus/Lexer.cpp | 84 +++++++++++++++++++++++++- src/libs/3rdparty/cplusplus/Lexer.h | 1 + src/libs/3rdparty/cplusplus/Parser.cpp | 19 +++++- src/libs/3rdparty/cplusplus/Token.cpp | 7 +++ src/libs/3rdparty/cplusplus/Token.h | 5 ++ 5 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp index 5214a345f70..9804c8c9f16 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.cpp +++ b/src/libs/3rdparty/cplusplus/Lexer.cpp @@ -571,14 +571,25 @@ void Lexer::scan_helper(Token *tok) } } - if (ch == 'L' || ch == 'u' || ch == 'U') { + if (ch == 'L' || ch == 'u' || ch == 'U' || ch == 'R') { // Either a literal or still an identifier. if (_yychar == '"') { yyinp(); - scanStringLiteral(tok, ch); + if (ch == 'R') + scanRawStringLiteral(tok); + else + scanStringLiteral(tok, ch); } else if (_yychar == '\'') { yyinp(); scanCharLiteral(tok, ch); + } else if (ch != 'R' && _yychar == 'R') { + yyinp(); + if (_yychar == '"') { + yyinp(); + scanRawStringLiteral(tok, ch); + } else { + scanIdentifier(tok, 1); + } } else if (ch == 'u' && _yychar == '8') { yyinp(); if (_yychar == '"') { @@ -587,6 +598,14 @@ void Lexer::scan_helper(Token *tok) } else if (_yychar == '\'') { yyinp(); scanCharLiteral(tok, '8'); + } else if (_yychar == 'R') { + yyinp(); + if (_yychar == '"') { + yyinp(); + scanRawStringLiteral(tok, '8'); + } else { + scanIdentifier(tok, 2); + } } else { scanIdentifier(tok, 1); } @@ -624,6 +643,67 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint) tok->f.kind = T_STRING_LITERAL; } +void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint) +{ + const char *yytext = _currentChar; + + int delimLength = -1; + const char *closingDelimCandidate = 0; + while (_yychar) { + if (_yychar == '(' && delimLength == -1) { + delimLength = _currentChar - yytext; + yyinp(); + } else if (_yychar == ')') { + yyinp(); + if (delimLength == -1) + break; + closingDelimCandidate = _currentChar; + } else { + if (delimLength == -1) { + if (_yychar == '\\' || std::isspace(_yychar)) + break; + yyinp(); + } else { + if (!closingDelimCandidate) { + yyinp(); + } else { + if (_yychar == '"') { + if (delimLength == _currentChar - closingDelimCandidate) { + // Got a matching closing delimiter. + break; + } + } + + // Make sure this continues to be a valid candidate. + if (_yychar != *(yytext + (_currentChar - closingDelimCandidate))) + closingDelimCandidate = 0; + + yyinp(); + } + } + } + } + + int yylen = _currentChar - yytext; + + if (_yychar == '"') + yyinp(); + + if (control()) + tok->string = control()->stringLiteral(yytext, yylen); + + if (hint == 'L') + tok->f.kind = T_RAW_WIDE_STRING_LITERAL; + else if (hint == 'U') + tok->f.kind = T_RAW_UTF32_STRING_LITERAL; + else if (hint == 'u') + tok->f.kind = T_RAW_UTF16_STRING_LITERAL; + else if (hint == '8') + tok->f.kind = T_RAW_UTF8_STRING_LITERAL; + else + tok->f.kind = T_RAW_STRING_LITERAL; +} + void Lexer::scanCharLiteral(Token *tok, unsigned char hint) { scanUntilQuote(tok, '\''); diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h index c61b53c31f0..0d527f1d828 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.h +++ b/src/libs/3rdparty/cplusplus/Lexer.h @@ -91,6 +91,7 @@ private: static int classifyOperator(const char *string, int length); void scanStringLiteral(Token *tok, unsigned char hint = 0); + void scanRawStringLiteral(Token *tok, unsigned char hint = 0); void scanCharLiteral(Token *tok, unsigned char hint = 0); void scanUntilQuote(Token *tok, unsigned char quote); void scanNumericLiteral(Token *tok); diff --git a/src/libs/3rdparty/cplusplus/Parser.cpp b/src/libs/3rdparty/cplusplus/Parser.cpp index 6303209d786..60625a3aa3e 100644 --- a/src/libs/3rdparty/cplusplus/Parser.cpp +++ b/src/libs/3rdparty/cplusplus/Parser.cpp @@ -2815,7 +2815,12 @@ bool Parser::parseStringLiteral(ExpressionAST *&node) || LA() == T_WIDE_STRING_LITERAL || LA() == T_UTF8_STRING_LITERAL || LA() == T_UTF16_STRING_LITERAL - || LA() == T_UTF32_STRING_LITERAL)) { + || LA() == T_UTF32_STRING_LITERAL + || LA() == T_RAW_STRING_LITERAL + || LA() == T_RAW_WIDE_STRING_LITERAL + || LA() == T_RAW_UTF8_STRING_LITERAL + || LA() == T_RAW_UTF16_STRING_LITERAL + || LA() == T_RAW_UTF32_STRING_LITERAL)) { return false; } @@ -2825,7 +2830,12 @@ bool Parser::parseStringLiteral(ExpressionAST *&node) || LA() == T_WIDE_STRING_LITERAL || LA() == T_UTF8_STRING_LITERAL || LA() == T_UTF16_STRING_LITERAL - || LA() == T_UTF32_STRING_LITERAL) { + || LA() == T_UTF32_STRING_LITERAL + || LA() == T_RAW_STRING_LITERAL + || LA() == T_RAW_WIDE_STRING_LITERAL + || LA() == T_RAW_UTF8_STRING_LITERAL + || LA() == T_RAW_UTF16_STRING_LITERAL + || LA() == T_RAW_UTF32_STRING_LITERAL) { *ast = new (_pool) StringLiteralAST; (*ast)->literal_token = consumeToken(); ast = &(*ast)->next; @@ -4054,6 +4064,11 @@ bool Parser::parsePrimaryExpression(ExpressionAST *&node) case T_UTF8_STRING_LITERAL: case T_UTF16_STRING_LITERAL: case T_UTF32_STRING_LITERAL: + case T_RAW_STRING_LITERAL: + case T_RAW_WIDE_STRING_LITERAL: + case T_RAW_UTF8_STRING_LITERAL: + case T_RAW_UTF16_STRING_LITERAL: + case T_RAW_UTF32_STRING_LITERAL: return parseStringLiteral(node); case T_NULLPTR: diff --git a/src/libs/3rdparty/cplusplus/Token.cpp b/src/libs/3rdparty/cplusplus/Token.cpp index 90210024833..dcc18601c21 100644 --- a/src/libs/3rdparty/cplusplus/Token.cpp +++ b/src/libs/3rdparty/cplusplus/Token.cpp @@ -35,6 +35,8 @@ static const char *token_names[] = { (""), (""), (""), (""), (""), (""), (""), (""), (""), + (""), (""), (""), + (""), (""), ("<@string literal>"), (""), ("&"), ("&&"), ("&="), ("->"), ("->*"), ("^"), ("^="), (":"), ("::"), @@ -105,6 +107,11 @@ const char *Token::spell() const case T_UTF8_STRING_LITERAL: case T_UTF16_STRING_LITERAL: case T_UTF32_STRING_LITERAL: + case T_RAW_STRING_LITERAL: + case T_RAW_WIDE_STRING_LITERAL: + case T_RAW_UTF8_STRING_LITERAL: + case T_RAW_UTF16_STRING_LITERAL: + case T_RAW_UTF32_STRING_LITERAL: case T_AT_STRING_LITERAL: case T_ANGLE_STRING_LITERAL: return literal->chars(); diff --git a/src/libs/3rdparty/cplusplus/Token.h b/src/libs/3rdparty/cplusplus/Token.h index 41abe17e9ab..a89e5522561 100644 --- a/src/libs/3rdparty/cplusplus/Token.h +++ b/src/libs/3rdparty/cplusplus/Token.h @@ -49,6 +49,11 @@ enum Kind { T_UTF8_STRING_LITERAL, T_UTF16_STRING_LITERAL, T_UTF32_STRING_LITERAL, + T_RAW_STRING_LITERAL, + T_RAW_WIDE_STRING_LITERAL, + T_RAW_UTF8_STRING_LITERAL, + T_RAW_UTF16_STRING_LITERAL, + T_RAW_UTF32_STRING_LITERAL, T_AT_STRING_LITERAL, T_ANGLE_STRING_LITERAL, T_LAST_STRING_LITERAL = T_ANGLE_STRING_LITERAL,