C++: Basic support for C++11 user-defined literals

1. Extends lexer so digit or string can be followed by underscore '_' and alphanumeric defining literal. 2. Extends parser so it accepts operator"" _abc(...) user-defined literal definition. 3. Adds Token::Flags.userDefinedLiteral bool flag field representing if token carries user-defined literal. 4. Adds C++11 auto tests case with: 12_km, 0.5_Pa, 'c'_X, "abd"_L, u"xyz"_M 5. All optional suffix scanning methods now return boolean if the suffix was found. 6. Adds C++ Lexer tests for user-defined literals with C++11 feature enabled. This change however does not make QtCreator understand user-defined literal semantics, e.g. properly resolve type when applying custom literal operator. Change-Id: I30e62f025ec9fb11c39261985ea4d772b1a80949 Reviewed-by: Nikolai Kosjar <nikolai.kosjar@theqtcompany.com>
2014-11-02 14:42:23 +01:00
parent 5699991a2f
commit 425811291d
7 changed files with 115 additions and 22 deletions
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -314,7 +314,8 @@ void Lexer::scan_helper(Token *tok)
            yyinp();
            scanDigitSequence(); // this is optional: we already skipped over the first digit
            scanExponentPart();
-            scanOptionalFloatingSuffix();
+            if (!scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            if (std::isalnum(_yychar) || _yychar == '_') {
                do {
                    yyinp();
@@ -683,6 +684,7 @@ void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
        tok->f.kind = T_STRING_LITERAL;

    scanUntilQuote(tok, '"');
+    scanOptionalUserDefinedLiteral(tok);
 }

 void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
@@ -758,6 +760,7 @@ void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
        tok->f.kind = T_CHAR_LITERAL;

    scanUntilQuote(tok, '\'');
+    scanOptionalUserDefinedLiteral(tok);
 }

 void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
@@ -802,13 +805,16 @@ bool Lexer::scanExponentPart()
    return scanDigitSequence();
 }

-void Lexer::scanOptionalFloatingSuffix()
+bool Lexer::scanOptionalFloatingSuffix()
 {
-    if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
+    if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L') {
        yyinp();
+        return true;
+    }
+    return false;
 }

-void Lexer::scanOptionalIntegerSuffix(bool allowU)
+bool Lexer::scanOptionalIntegerSuffix(bool allowU)
 {
    switch(_yychar) {
    case 'u':
@@ -817,19 +823,28 @@ void Lexer::scanOptionalIntegerSuffix(bool allowU)
            yyinp();
            scanOptionalIntegerSuffix(false);
        }
-        return;
+        return true;
    case 'l':
        yyinp();
        if (_yychar == 'l')
            yyinp();
-        return;
+        return true;
    case 'L':
        yyinp();
        if (_yychar == 'L')
            yyinp();
-        return;
+        return true;
    default:
-        return;
+        return false;
+    }
+}
+
+void Lexer::scanOptionalUserDefinedLiteral(Token *tok)
+{
+    if (_languageFeatures.cxx11Enabled && _yychar == '_') {
+        tok->f.userDefinedLiteral = true;
+        while (std::isalnum(_yychar) || _yychar == '_' || isByteOfMultiByteCodePoint(_yychar))
+            yyinp();
    }
 }

@@ -844,19 +859,22 @@ void Lexer::scanNumericLiteral(Token *tok)
                   (_yychar >= 'A' && _yychar <= 'F')) {
                yyinp();
            }
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            goto theEnd;
        } else if (_yychar == 'b' || _yychar == 'B') { // see n3472
            yyinp();
            while (_yychar == '0' || _yychar == '1')
                yyinp();
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            goto theEnd;
        } else if (_yychar >= '0' && _yychar <= '7') {
            do {
                yyinp();
            } while (_yychar >= '0' && _yychar <= '7');
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            goto theEnd;
        }
    }
@@ -866,16 +884,18 @@ void Lexer::scanNumericLiteral(Token *tok)
            yyinp();
            scanDigitSequence(); // this is optional: "1." is a valid floating point number
            scanExponentPart();
-            scanOptionalFloatingSuffix();
+            if (!scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            break;
        } else if (_yychar == 'e' || _yychar == 'E') {
-            if (scanExponentPart())
-                scanOptionalFloatingSuffix();
+            if (scanExponentPart() && !scanOptionalFloatingSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            break;
        } else if (std::isdigit(_yychar)) {
            yyinp();
        } else {
-            scanOptionalIntegerSuffix();
+            if (!scanOptionalIntegerSuffix())
+                scanOptionalUserDefinedLiteral(tok);
            break;
        }
    }
@@ -911,6 +931,7 @@ void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
        } else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
            yyinp();
        } else {
+            scanOptionalUserDefinedLiteral(tok);
            break;
        }
    }
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -100,8 +100,9 @@ private:
    void scanUntilQuote(Token *tok, unsigned char quote);
    bool scanDigitSequence();
    bool scanExponentPart();
-    void scanOptionalFloatingSuffix();
-    void scanOptionalIntegerSuffix(bool allowU = true);
+    bool scanOptionalFloatingSuffix();
+    bool scanOptionalIntegerSuffix(bool allowU = true);
+    void scanOptionalUserDefinedLiteral(Token *tok);
    void scanNumericLiteral(Token *tok);
    void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
    void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
--- a/src/libs/3rdparty/cplusplus/Parser.cpp
+++ b/src/libs/3rdparty/cplusplus/Parser.cpp
@@ -1274,6 +1274,14 @@ bool Parser::parseOperator(OperatorAST *&node) // ### FIXME
        } else if (LA() == T_LBRACKET && LA(2) == T_RBRACKET) {
            ast->op_token = ast->open_token = consumeToken();
            ast->close_token = consumeToken();
+        } else if (_languageFeatures.cxx11Enabled &&
+                   LA() == T_STRING_LITERAL && LA(2) == T_IDENTIFIER &&
+                   !tok().f.userDefinedLiteral && tok().string->size() == 0 &&
+                   tok(2).identifier->size() > 1 && tok(2).identifier->chars()[0] == '_') {
+            // C++11 user-defined literal operator, e.g.:
+            // int operator"" _abc123(const char *str, size_t size) { ... }
+            ast->op_token = consumeToken();
+            consumeToken(); // consume literal operator identifier
        } else {
            return false;
        }
--- a/src/libs/3rdparty/cplusplus/Token.h
+++ b/src/libs/3rdparty/cplusplus/Token.h
@@ -302,6 +302,7 @@ public:
    inline bool joined() const { return f.joined; }
    inline bool expanded() const { return f.expanded; }
    inline bool generated() const { return f.generated; }
+    inline bool userDefinedLiteral() const { return f.userDefinedLiteral; }

    inline unsigned bytes() const { return f.bytes; }
    inline unsigned bytesBegin() const { return byteOffset; }
@@ -363,8 +364,11 @@ public:
        // Tokens '1', '+', '2', and ';' are all expanded. However only tokens '+' and ';'
        // are generated.
        unsigned generated     : 1;
+        // The token is C++11 user-defined literal such as:
+        // 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
+        unsigned userDefinedLiteral : 1;
        // Unused...
-        unsigned pad           : 3;
+        unsigned pad           : 2;
        // The token length in bytes and UTF16 chars.
        unsigned bytes         : 16;
        unsigned utf16chars    : 16;
--- a/tests/auto/cplusplus/cxx11/data/userDefinedLiterals.1.cpp
+++ b/tests/auto/cplusplus/cxx11/data/userDefinedLiterals.1.cpp
@@ -0,0 +1,7 @@
+constexpr long double operator"" _inv(long double value) {
+  return 1.0 / value;
+}
+int main() {
+  auto foo = operator"" _inv(2.3);
+  return 12_km + 0.5_Pa + 'c'_X + "abd"_L + u"xyz"_M;
+}
--- a/tests/auto/cplusplus/cxx11/tst_cxx11.cpp
+++ b/tests/auto/cplusplus/cxx11/tst_cxx11.cpp
@@ -197,6 +197,7 @@ void tst_cxx11::parse_data()
    QTest::newRow("threadLocal.1") << "threadLocal.1.cpp" << "";
    QTest::newRow("trailingtypespec.1") << "trailingtypespec.1.cpp" << "";
    QTest::newRow("lambda.2") << "lambda.2.cpp" << "";
+    QTest::newRow("userDefinedLiterals.1") << "userDefinedLiterals.1.cpp" << "";
 }

 void tst_cxx11::parse()
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -61,7 +61,8 @@ public:
        CompareBytesEnd        = 1 << 4,
        CompareUtf16Chars      = 1 << 5,
        CompareUtf16CharsBegin = 1 << 6,
-        CompareUtf16CharsEnd   = 1 << 7
+        CompareUtf16CharsEnd   = 1 << 7,
+        CompareUserDefinedLiteral = 1 << 8
    };
    Q_DECLARE_FLAGS(TokenCompareFlags, TokenCompareFlag)

@@ -77,6 +78,8 @@ private slots:

    void bytes_and_utf16chars();
    void bytes_and_utf16chars_data();
+    void user_defined_literals();
+    void user_defined_literals_data();
    void offsets();
    void offsets_data();

@@ -87,7 +90,8 @@ private:
             const Tokens &expectedTokens,
             bool preserveState,
             TokenCompareFlags compareFlags,
-             bool preprocessorMode = false);
+             bool preprocessorMode = false,
+             const LanguageFeatures &extraLanguageFeatures = LanguageFeatures());

    int _state;
 };
@@ -109,12 +113,18 @@ void tst_SimpleLexer::run(const QByteArray &source,
                          const Tokens &expectedTokens,
                          bool preserveState,
                          TokenCompareFlags compareFlags,
-                          bool preprocessorMode)
+                          bool preprocessorMode,
+                          const LanguageFeatures &extraLanguageFeatures)
 {
    QVERIFY(compareFlags);

    SimpleLexer lexer;
    lexer.setPreprocessorMode(preprocessorMode);
+    if (extraLanguageFeatures.flags) {
+        LanguageFeatures languageFeatures = lexer.languageFeatures();
+        languageFeatures.flags |= extraLanguageFeatures.flags;
+        lexer.setLanguageFeatures(languageFeatures);
+    }
    const Tokens tokens = lexer(source, preserveState ? _state : 0);
    if (preserveState)
        _state = lexer.state();
@@ -146,6 +156,8 @@ void tst_SimpleLexer::run(const QByteArray &source,
            QCOMPARE(token.utf16charsBegin(), expectedToken.utf16charsBegin());
        if (compareFlags & CompareUtf16CharsEnd)
            QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd());
+        if (compareFlags & CompareUserDefinedLiteral)
+            QCOMPARE(token.userDefinedLiteral(), expectedToken.userDefinedLiteral());
    }

    QString msg = QLatin1String("Less tokens than expected: got %1, expected %2.");
@@ -364,12 +376,14 @@ void tst_SimpleLexer::bytes_and_utf16chars()
    run(source, expectedTokens, false, compareFlags);
 }

-static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars)
+static Tokens createToken(unsigned kind, unsigned bytes, unsigned utf16chars,
+                          bool userDefinedLiteral = false)
 {
    Token t;
    t.f.kind = kind;
    t.f.bytes = bytes;
    t.f.utf16chars = utf16chars;
+    t.f.userDefinedLiteral = userDefinedLiteral;
    return Tokens() << t;
 }

@@ -445,6 +459,43 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
        << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6);
 }

+void tst_SimpleLexer::user_defined_literals()
+{
+    QFETCH(QByteArray, source);
+    QFETCH(Tokens, expectedTokens);
+
+    const TokenCompareFlags compareFlags = CompareKind | CompareBytes | CompareUtf16Chars | CompareUserDefinedLiteral;
+    LanguageFeatures languageFeatures;
+    languageFeatures.cxx11Enabled = true;
+    run(source, expectedTokens, false, compareFlags, false, languageFeatures);
+}
+
+void tst_SimpleLexer::user_defined_literals_data()
+{
+    QTest::addColumn<QByteArray>("source");
+    QTest::addColumn<Tokens>("expectedTokens");
+
+    typedef QByteArray _;
+
+    // String User-defined Literals
+    QTest::newRow("latin1 string non-user-defined literal")
+        << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7, false);
+    QTest::newRow("latin1 string user-defined literal")
+        << _("\"hello\"_udl") << createToken(T_STRING_LITERAL, 11, 11, true);
+
+    // Numeric User-defined Literals
+    QTest::newRow("numeric non user-defined literal with integer suffix")
+        << _("11LL") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
+    QTest::newRow("numeric non user-defined literal with decimal part")
+        << _("11.1") << createToken(T_NUMERIC_LITERAL, 4, 4, false);
+    QTest::newRow("numeric non user-defined literal with float suffix")
+        << _("11.1f") << createToken(T_NUMERIC_LITERAL, 5, 5, false);
+    QTest::newRow("numeric user-defined literal without decimal part")
+        << _("11_udl") << createToken(T_NUMERIC_LITERAL, 6, 6, true);
+    QTest::newRow("numeric user-defined literal with decimal part")
+        << _("11.1_udl") << createToken(T_NUMERIC_LITERAL, 8, 8, true);
+}
+
 static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes,
                         unsigned utf16charsOffset, unsigned utf16chars)
 {