C++: Tests: Fix unicode tests with MSVC

Change-Id: I5575826558bf60982ecc964e4dd85a3f4e920197 Reviewed-by: Christian Stenger <christian.stenger@digia.com>
2014-05-30 12:09:05 -04:00
parent 2e5ce2a8dc
commit 655470cc0c
15 changed files with 175 additions and 126 deletions
@@ -1784,21 +1784,39 @@ void CppEditorPlugin::test_quickfix_InsertDefFromDecl_unicodeIdentifier()
    QByteArray original;
    QByteArray expected;
    //
    // The following "non-latin1" code points are used in the tests:
    //
    //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
    //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
    //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
    //
 #define UNICODE_U00FC "\xc3\xbc"
 #define UNICODE_U4E8C "\xe4\xba\x8c"
 #define UNICODE_U10302 "\xf0\x90\x8c\x82"
 #define TEST_UNICODE_IDENTIFIER UNICODE_U00FC UNICODE_U4E8C UNICODE_U10302
    original =
            "class Foo {\n"
-            "    void @\u00FC\u4E8C\U00010302();\n"
+            "    void @" TEST_UNICODE_IDENTIFIER "();\n"
            "};\n";
            ;
    expected = original;
    expected +=
            "\n"
            "\n"
-            "void Foo::\u00FC\u4E8C\U00010302()\n"
+            "void Foo::" TEST_UNICODE_IDENTIFIER "()\n"
            "{\n"
            "\n"
            "}\n";
    testFiles << QuickFixTestDocument::create("file.cpp", original, expected);
 #undef UNICODE_U00FC
 #undef UNICODE_U4E8C
 #undef UNICODE_U10302
 #undef TEST_UNICODE_IDENTIFIER
    InsertDefFromDecl factory;
    QuickFixTestCase(testFiles, &factory);
 }
@@ -44,6 +44,19 @@
 #include <QDir>
 #include <QtTest>
 //
 // The following "non-latin1" code points are used in the tests:
 //
 //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
 //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
 //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
 //
 #define UNICODE_U00FC "\xc3\xbc"
 #define UNICODE_U4E8C "\xe4\xba\x8c"
 #define UNICODE_U10302 "\xf0\x90\x8c\x82"
 #define TEST_UNICODE_IDENTIFIER UNICODE_U00FC UNICODE_U4E8C UNICODE_U10302
 /*!
    Tests for Follow Symbol Under Cursor and Switch Between Function Declaration/Definition
@@ -501,8 +514,8 @@ void CppEditorPlugin::test_SwitchMethodDeclarationDefinition_data()
    ) << _();
    QTest::newRow("unicodeIdentifier") << _(
-        "class Foo { void $\u00FC\u4E8C\U00010302(); };\n"
+        "class Foo { void $" TEST_UNICODE_IDENTIFIER "(); };\n"
-        "void Foo::@\u00FC\u4E8C\U00010302() {}\n"
+        "void Foo::@" TEST_UNICODE_IDENTIFIER "() {}\n"
    ) << _();
 }
@@ -921,8 +934,8 @@ void CppEditorPlugin::test_FollowSymbolUnderCursor_data()
    );
    QTest::newRow("unicodeIdentifier") << _(
-        "class Foo { void $\u00FC\u4E8C\U00010302(); };\n"
+        "class Foo { void $" TEST_UNICODE_IDENTIFIER "(); };\n"
-        "void Foo::@\u00FC\u4E8C\U00010302() {}\n"
+        "void Foo::@" TEST_UNICODE_IDENTIFIER "() {}\n"
    );
 }
@@ -1,2 +1,3 @@
 include(../shared/shared.pri)
 SOURCES += tst_checksymbols.cpp
 HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
 CPlusPlusAutotest {
    name: "CPlusPlus check symbols autotest"
-    files: "tst_checksymbols.cpp"
+    files: [ "tst_checksymbols.cpp", "../cplusplus_global.h" ]
 }
@@ -27,6 +27,8 @@
 **
 ****************************************************************************/
 #include "../cplusplus_global.h"
 #include <cplusplus/CppDocument.h>
 #include <cplusplus/pp.h>
@@ -1595,9 +1597,9 @@ void tst_CheckSymbols::test_checksymbols_data()
            << Use(12, 23, 1, Highlighting::LocalUse));
    QTest::newRow("using_inside_different_namespace_QTCREATORBUG7978")
-        << _("class My\u00FC\u4E8C\U00010302Type { int \u00FC\u4E8C\U00010302Member; };\n"
+        << _("class My" TEST_UNICODE_IDENTIFIER "Type { int " TEST_UNICODE_IDENTIFIER "Member; };\n"
-             "void f(My\u00FC\u4E8C\U00010302Type var\u00FC\u4E8C\U00010302)\n"
+             "void f(My" TEST_UNICODE_IDENTIFIER "Type var" TEST_UNICODE_IDENTIFIER ")\n"
-             "{ var\u00FC\u4E8C\U00010302.\u00FC\u4E8C\U00010302Member = 0; }\n")
+             "{ var" TEST_UNICODE_IDENTIFIER "." TEST_UNICODE_IDENTIFIER "Member = 0; }\n")
        << (UseList()
            << Use(1, 7, 10, Highlighting::TypeUse)
            << Use(1, 24, 10, Highlighting::FieldUse)
@@ -1607,18 +1609,10 @@ void tst_CheckSymbols::test_checksymbols_data()
            << Use(3, 3, 7, Highlighting::LocalUse)
            << Use(3, 11, 10, Highlighting::FieldUse));
    //
    // The following "non-latin1" code points are used in the next tests:
    //
    //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
    //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
    //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
    //
    QTest::newRow("unicodeIdentifier1")
-        << _("class My\u00FC\u4E8C\U00010302Type { int \u00FC\u4E8C\U00010302Member; };\n"
+        << _("class My" TEST_UNICODE_IDENTIFIER "Type { int " TEST_UNICODE_IDENTIFIER "Member; };\n"
-             "void f(My\u00FC\u4E8C\U00010302Type var\u00FC\u4E8C\U00010302)\n"
+             "void f(My" TEST_UNICODE_IDENTIFIER "Type var" TEST_UNICODE_IDENTIFIER ")\n"
-             "{ var\u00FC\u4E8C\U00010302.\u00FC\u4E8C\U00010302Member = 0; }\n")
+             "{ var" TEST_UNICODE_IDENTIFIER "." TEST_UNICODE_IDENTIFIER "Member = 0; }\n")
        << (UseList()
            << Use(1, 7, 10, CppHighlightingSupport::TypeUse)
            << Use(1, 24, 10, CppHighlightingSupport::FieldUse)
@@ -1629,15 +1623,15 @@ void tst_CheckSymbols::test_checksymbols_data()
            << Use(3, 11, 10, CppHighlightingSupport::FieldUse));
    QTest::newRow("unicodeIdentifier2")
-        << _("class v\u00FC\u4E8C\U00010302\n"
+        << _("class v" TEST_UNICODE_IDENTIFIER "\n"
             "{\n"
             "public:\n"
-             "    v\u00FC\u4E8C\U00010302();\n"
+             "    v" TEST_UNICODE_IDENTIFIER "();\n"
-             "    ~v\u00FC\u4E8C\U00010302();\n"
+             "    ~v" TEST_UNICODE_IDENTIFIER "();\n"
             "};\n"
             "\n"
-             "v\u00FC\u4E8C\U00010302::v\u00FC\u4E8C\U00010302() {}\n"
+             "v" TEST_UNICODE_IDENTIFIER "::v" TEST_UNICODE_IDENTIFIER "() {}\n"
-             "v\u00FC\u4E8C\U00010302::~v\u00FC\u4E8C\U00010302() {}\n")
+             "v" TEST_UNICODE_IDENTIFIER "::~v" TEST_UNICODE_IDENTIFIER "() {}\n")
        << (UseList()
            << Use(1, 7, 5, Highlighting::TypeUse)
            << Use(4, 5, 5, Highlighting::TypeUse)
@@ -0,0 +1,46 @@
 /****************************************************************************
 **
 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of Qt Creator.
 **
 ** Commercial License Usage
 ** Licensees holding valid commercial Qt licenses may use this file in
 ** accordance with the commercial license agreement provided with the
 ** Software or, alternatively, in accordance with the terms contained in
 ** a written agreement between you and Digia.  For licensing terms and
 ** conditions see http://qt.digia.com/licensing.  For further information
 ** use the contact form at http://qt.digia.com/contact-us.
 **
 ** GNU Lesser General Public License Usage
 ** Alternatively, this file may be used under the terms of the GNU Lesser
 ** General Public License version 2.1 as published by the Free Software
 ** Foundation and appearing in the file LICENSE.LGPL included in the
 ** packaging of this file.  Please review the following information to
 ** ensure the GNU Lesser General Public License version 2.1 requirements
 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
 **
 ** In addition, as a special exception, Digia gives you certain additional
 ** rights.  These rights are described in the Digia Qt LGPL Exception
 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
 **
 ****************************************************************************/
 #ifndef CPLUSPLUS_GLOBAL_H
 #define CPLUSPLUS_GLOBAL_H
 //
 // The following "non-latin1" code points are used in the tests:
 //
 //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
 //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
 //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
 //
 #define UC_U00FC "\xc3\xbc"
 #define UC_U4E8C "\xe4\xba\x8c"
 #define UC_U10302 "\xf0\x90\x8c\x82"
 #define TEST_UNICODE_IDENTIFIER UC_U00FC UC_U4E8C UC_U10302
 #endif // CPLUSPLUS_GLOBAL_H
@@ -1,2 +1,3 @@
 include(../shared/shared.pri)
 SOURCES += tst_findusages.cpp
 HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
 CPlusPlusAutotest {
    name: "CPlusPlus find usages autotest"
-    files: "tst_findusages.cpp"
+    files: [ "tst_findusages.cpp", "../cplusplus_global.h" ]
 }
@@ -27,6 +27,8 @@
 **
 ****************************************************************************/
 #include "../cplusplus_global.h"
 #include <QtTest>
 #include <QObject>
 #include <QList>
@@ -955,17 +957,9 @@ void tst_FindUsages::usingInDifferentNamespace_QTCREATORBUG7978()
 void tst_FindUsages::unicodeIdentifier()
 {
    //
    // The following "non-latin1" code points are used:
    //
    //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
    //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
    //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
    //
    const QByteArray src = "\n"
-            "int var\u00FC\u4E8C\U00010302;\n"
+            "int var" TEST_UNICODE_IDENTIFIER ";\n"
-            "void f() { var\u00FC\u4E8C\U00010302 = 1; }\n";
+            "void f() { var" TEST_UNICODE_IDENTIFIER " = 1; }\n";
            ;
    Document::Ptr doc = Document::create("u");
@@ -1,3 +1,3 @@
 include(../shared/shared.pri)
 SOURCES += tst_lexer.cpp
-
+HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
 CPlusPlusAutotest {
    name: "CPlusPlus lexer autotest"
-    files: "tst_lexer.cpp"
+    files: [ "tst_lexer.cpp", "../cplusplus_global.h" ]
 }
@@ -27,6 +27,8 @@
 **
 ****************************************************************************/
 #include "../cplusplus_global.h"
 #include <cplusplus/Token.h>
 #include <cplusplus/SimpleLexer.h>
@@ -69,14 +71,6 @@ private slots:
    void incremental();
    void incremental_data();
    //
    // The following "non-latin1" code points are used in the tests following this comment:
    //
    //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
    //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
    //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
    //
    void bytes_and_utf16chars();
    void bytes_and_utf16chars_data();
    void offsets();
@@ -291,43 +285,43 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
    // NON-LATIN1 identifier (code point with 2 UTF8 code units)
    QTest::newRow("non-latin1 identifier (2-byte code unit at start)")
-        << _("\u00FC_var") << createToken(T_IDENTIFIER, 6, 5);
+        << _(UC_U00FC "_var") << createToken(T_IDENTIFIER, 6, 5);
    QTest::newRow("non-latin1 identifier (2-byte code unit in center)")
-        << _("_v\u00FCr_") << createToken(T_IDENTIFIER, 6, 5);
+        << _("_v" UC_U00FC "r_") << createToken(T_IDENTIFIER, 6, 5);
    QTest::newRow("non-latin1 identifier (2-byte code unit at end)")
-        << _("var_\u00FC") << createToken(T_IDENTIFIER, 6, 5);
+        << _("var_" UC_U00FC) << createToken(T_IDENTIFIER, 6, 5);
    QTest::newRow("non-latin1 identifier (2-byte code unit only)")
-        << _("\u00FC") << createToken(T_IDENTIFIER, 2, 1);
+        << _(UC_U00FC) << createToken(T_IDENTIFIER, 2, 1);
    // NON-LATIN1 identifier (code point with 3 UTF8 code units)
    QTest::newRow("non-latin1 identifier (3-byte code unit at start)")
-        << _("\u4E8C_var") << createToken(T_IDENTIFIER, 7, 5);
+        << _(UC_U4E8C "_var") << createToken(T_IDENTIFIER, 7, 5);
    QTest::newRow("non-latin1 identifier (3-byte code unit in center)")
-        << _("_v\u4E8Cr_") << createToken(T_IDENTIFIER, 7, 5);
+        << _("_v" UC_U4E8C "r_") << createToken(T_IDENTIFIER, 7, 5);
    QTest::newRow("non-latin1 identifier (3-byte code unit at end)")
-        << _("var_\u4E8C") << createToken(T_IDENTIFIER, 7, 5);
+        << _("var_" UC_U4E8C) << createToken(T_IDENTIFIER, 7, 5);
    QTest::newRow("non-latin1 identifier (3-byte code unit only)")
-        << _("\u4E8C") << createToken(T_IDENTIFIER, 3, 1);
+        << _(UC_U4E8C) << createToken(T_IDENTIFIER, 3, 1);
    // NON-LATIN1 identifier (code point with 4 UTF8 code units)
    QTest::newRow("non-latin1 identifier (4-byte code unit at start)")
-        << _("\U00010302_var") << createToken(T_IDENTIFIER, 8, 6);
+        << _(UC_U10302 "_var") << createToken(T_IDENTIFIER, 8, 6);
    QTest::newRow("non-latin1 identifier (4-byte code unit in center)")
-        << _("_v\U00010302r_") << createToken(T_IDENTIFIER, 8, 6);
+        << _("_v" UC_U10302 "r_") << createToken(T_IDENTIFIER, 8, 6);
    QTest::newRow("non-latin1 identifier (4-byte code unit at end)")
-        << _("var_\U00010302") << createToken(T_IDENTIFIER, 8, 6);
+        << _("var_" UC_U10302) << createToken(T_IDENTIFIER, 8, 6);
    QTest::newRow("non-latin1 identifier (4-byte code unit only)")
-        << _("\U00010302") << createToken(T_IDENTIFIER, 4, 2);
+        << _(UC_U10302) << createToken(T_IDENTIFIER, 4, 2);
    // NON-LATIN1 identifier (code points with several multi-byte UTF8 code units)
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units at start)")
-        << _("\u00FC\u4E8C\U00010302_var") << createToken(T_IDENTIFIER, 13, 8);
+        << _(UC_U00FC UC_U4E8C UC_U10302 "_var") << createToken(T_IDENTIFIER, 13, 8);
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units in center)")
-        << _("_v\u00FC\u4E8C\U00010302r_") << createToken(T_IDENTIFIER, 13, 8);
+        << _("_v" UC_U00FC UC_U4E8C UC_U10302 "r_") << createToken(T_IDENTIFIER, 13, 8);
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units at end)")
-        << _("var_\u00FC\u4E8C\U00010302") << createToken(T_IDENTIFIER, 13, 8);
+        << _("var_" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 13, 8);
    QTest::newRow("non-latin1 identifier (mixed multi-byte code units only)")
-        << _("\u00FC\u4E8C\U00010302") << createToken(T_IDENTIFIER, 9, 4);
+        << _(UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 9, 4);
    // Comments
    QTest::newRow("ascii comment /* ... */")
@@ -335,19 +329,19 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
    QTest::newRow("latin1 comment //")
        << _("// hello world") << createToken(T_CPP_COMMENT, 14, 14);
    QTest::newRow("non-latin1 comment /* ... */ (1)")
-        << _("/* \u00FC\u4E8C\U00010302 */") << createToken(T_COMMENT, 15, 10);
+        << _("/* " UC_U00FC UC_U4E8C UC_U10302 " */") << createToken(T_COMMENT, 15, 10);
    QTest::newRow("non-latin1 comment /* ... */ (2)")
-        << _("/*\u00FC\u4E8C\U00010302*/") << createToken(T_COMMENT, 13, 8);
+        << _("/*" UC_U00FC UC_U4E8C UC_U10302 "*/") << createToken(T_COMMENT, 13, 8);
    QTest::newRow("non-latin1 comment // (1)")
-        << _("// \u00FC\u4E8C\U00010302") << createToken(T_CPP_COMMENT, 12, 7);
+        << _("// " UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 12, 7);
    QTest::newRow("non-latin1 comment // (2)")
-        << _("//\u00FC\u4E8C\U00010302") << createToken(T_CPP_COMMENT, 11, 6);
+        << _("//" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 11, 6);
    // String Literals
    QTest::newRow("latin1 string literal")
        << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7);
    QTest::newRow("non-latin1 string literal")
-        << _("\"\u00FC\u4E8C\U00010302\"") << createToken(T_STRING_LITERAL, 11, 6);
+        << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6);
 }
 static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes,
@@ -392,21 +386,21 @@ void tst_SimpleLexer::offsets_data()
    // NON-LATIN1 identifier
    QTest::newRow("non-latin1 identifiers 1")
-        << _("var_\u00FC var_\u00FC") << (QList<Token>()
+        << _("var_" UC_U00FC " var_" UC_U00FC) << (QList<Token>()
            << createToken(T_IDENTIFIER, 0, 6, 0, 5)
            << createToken(T_IDENTIFIER, 7, 6, 6, 5)
        );
    QTest::newRow("non-latin1 identifiers 2")
-        << _("\u00FC\u4E8C\U00010302 \u00FC\u4E8C\U00010302") << (QList<Token>()
+        << _(UC_U00FC UC_U4E8C UC_U10302 " " UC_U00FC UC_U4E8C UC_U10302) << (QList<Token>()
            << createToken(T_IDENTIFIER, 0, 9, 0, 4)
            << createToken(T_IDENTIFIER, 10, 9, 5, 4)
        );
    QTest::newRow("non-latin1 identifiers 3")   // first code unit on line: <bytes> / <utf16char>
-        << _("class v\u00FC\u4E8C\U00010302\n"  //  0 / 0
+        << _("class v" UC_U00FC UC_U4E8C UC_U10302 "\n"  //  0 / 0
             "{\n"                              // 17 / 12
             "public:\n"                        // 19 / 14
-             "    v\u00FC\u4E8C\U00010302();\n" // 27 / 22
+             "    v" UC_U00FC UC_U4E8C UC_U10302 "();\n" // 27 / 22
             "};\n") << (QList<Token>()         // 45 / 35
            << createToken(T_CLASS, 0, 5, 0, 5)         // class
            << createToken(T_IDENTIFIER, 6, 10, 6, 5)   // non-latin1 id
@@ -1,2 +1,3 @@
 include(../shared/shared.pri)
 SOURCES += tst_translationunit.cpp
 HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
 CPlusPlusAutotest {
    name: "CPlusPlus translation unit autotest"
-    files: "tst_translationunit.cpp"
+    files: [ "tst_translationunit.cpp", "../cplusplus_global.h" ]
 }
@@ -27,6 +27,8 @@
 **
 ****************************************************************************/
 #include "../cplusplus_global.h"
 #include <cplusplus/PreprocessorClient.h>
 #include <cplusplus/PreprocessorEnvironment.h>
 #include <cplusplus/Token.h>
@@ -56,15 +58,6 @@ class tst_TranslationUnit: public QObject
 {
    Q_OBJECT
 private slots:
    //
    // The following "non-latin1" code points are used in the tests following this comment:
    //
    //   U+00FC  - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
    //   U+4E8C  - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
    //   U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
    //
    void unicodeIdentifier();
    void unicodeIdentifier_data();
@@ -211,37 +204,31 @@ void tst_TranslationUnit::unicodeIdentifier_data()
    QTest::newRow("latin1 identifier") << _("var");
-    QTest::newRow("non-latin1 identifier 1") << _("prefix\u00FC\u4E8C\U00010302");
+    QTest::newRow("non-latin1 identifier 1") << _("prefix" UC_U00FC UC_U4E8C UC_U10302);
-    QTest::newRow("non-latin1 identifier 2") << _("prefix\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 2") << _("prefix" UC_U10302 UC_U00FC UC_U4E8C);
-    QTest::newRow("non-latin1 identifier 3") << _("\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 3") << _(UC_U10302 UC_U00FC UC_U4E8C);
-    QTest::newRow("non-latin1 identifier 4") << _("\u4E8C\U00010302\u00FC");
+    QTest::newRow("non-latin1 identifier 4") << _(UC_U4E8C UC_U10302 UC_U00FC);
-    QTest::newRow("non-latin1 identifier 5") << _("\u4E8C\U00010302\u00FCsuffix");
+    QTest::newRow("non-latin1 identifier 5") << _(UC_U4E8C UC_U10302 UC_U00FC "suffix");
-    QTest::newRow("non-latin1 identifier 6") << _("\U00010302\u00FC\u4E8Csuffix");
+    QTest::newRow("non-latin1 identifier 6") << _(UC_U10302 UC_U00FC UC_U4E8C "suffix");
    // Some special cases (different code path inside lexer)
-    QTest::newRow("non-latin1 identifier 7") << _("LR\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 7") << _("LR" UC_U10302 UC_U00FC UC_U4E8C);
-    QTest::newRow("non-latin1 identifier 8") << _("u8R\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 8") << _("u8R" UC_U10302 UC_U00FC UC_U4E8C);
-    QTest::newRow("non-latin1 identifier 9") << _("u8\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 9") << _("u8" UC_U10302 UC_U00FC UC_U4E8C);
-    QTest::newRow("non-latin1 identifier 10") << _("u\U00010302\u00FC\u4E8C");
+    QTest::newRow("non-latin1 identifier 10") << _("u" UC_U10302 UC_U00FC UC_U4E8C);
 }
-static QByteArray stripQuotesFromLiteral(const QByteArray literal)
+static QByteArray stripEncodingPrefixAndQuotationMarks(const QByteArray &literal)
 {
-    QByteArray result = literal;
+    const char quotationMark = '"';
-
+    const int firstQuotationMarkPosition = literal.indexOf(quotationMark);
-    // Strip front
+    const int lastQuotationMarkPosition = literal.lastIndexOf(quotationMark);
-    while (!result.isEmpty() && result[0] != '"')
+    Q_ASSERT(firstQuotationMarkPosition != -1);
-        result = result.mid(1);
+    Q_ASSERT(lastQuotationMarkPosition == literal.size() - 1);
-    if (result.isEmpty())
+    Q_ASSERT(firstQuotationMarkPosition < lastQuotationMarkPosition - 1);
        return QByteArray();
    result = result.mid(1);
    // Strip end
    while (result.size() >= 2
           && (std::isspace(result[result.size() - 1]) || result[result.size()-1] == '"')) {
        result.chop(1);
    }
    QByteArray result = literal.mid(firstQuotationMarkPosition + 1);
    result.chop(1);
    return result;
 }
@@ -254,7 +241,7 @@ void tst_TranslationUnit::unicodeStringLiteral()
    const StringLiteral *actual = document->lastStringLiteral();
    QCOMPARE(QString::fromUtf8(actual->chars(), actual->size()),
-             QString::fromUtf8(stripQuotesFromLiteral(literalText)));
+             QString::fromUtf8(stripEncodingPrefixAndQuotationMarks(literalText)));
 }
 void tst_TranslationUnit::unicodeStringLiteral_data()
@@ -265,17 +252,17 @@ void tst_TranslationUnit::unicodeStringLiteral_data()
    QTest::newRow("latin1 literal") << _("\"var\"");
-    QTest::newRow("non-latin1 literal 1") << _("\"prefix\u00FC\u4E8C\U00010302\"");
+    QTest::newRow("non-latin1 literal 1") << _("\"prefix" UC_U00FC UC_U4E8C UC_U10302 "\"");
-    QTest::newRow("non-latin1 literal 2") << _("\"prefix\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 2") << _("\"prefix" UC_U10302 UC_U00FC UC_U4E8C"\"");
-    QTest::newRow("non-latin1 literal 3") << _("\"\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 3") << _("\"" UC_U10302 UC_U00FC UC_U4E8C "\"");
-    QTest::newRow("non-latin1 literal 4") << _("\"\u4E8C\U00010302\u00FC\"");
+    QTest::newRow("non-latin1 literal 4") << _("\"" UC_U4E8C UC_U10302 UC_U00FC "\"");
-    QTest::newRow("non-latin1 literal 5") << _("\"\u4E8C\U00010302\u00FCsuffix\"");
+    QTest::newRow("non-latin1 literal 5") << _("\"" UC_U4E8C UC_U10302 UC_U00FC "suffix\"");
-    QTest::newRow("non-latin1 literal 6") << _("\"\U00010302\u00FC\u4E8Csuffix\"");
+    QTest::newRow("non-latin1 literal 6") << _("\"" UC_U10302 UC_U00FC UC_U4E8C "suffix\"");
-    QTest::newRow("non-latin1 literal 7") << _("L\"\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 7") << _("L\"U10302U00FCU4E8C\"");
-    QTest::newRow("non-latin1 literal 8") << _("u8\"\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 8") << _("u8\"U10302U00FCU4E8C\"");
-    QTest::newRow("non-latin1 literal 9") << _("u\"\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 9") << _("u\"U10302U00FCU4E8C\"");
-    QTest::newRow("non-latin1 literal 10") << _("U\"\U00010302\u00FC\u4E8C\"");
+    QTest::newRow("non-latin1 literal 10") << _("U\"U10302U00FCU4E8C\"");
 }
 void tst_TranslationUnit::locationOfUtf16CharOffset()
@@ -340,7 +327,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
        << LineColumnList();
    QTest::newRow("non-latin1 identifier")
-        << _("int \u00FC;")
+        << _("int " UC_U00FC ";")
        << (LineColumnList()
            << LineColumn(0, 0)
            << LineColumn(1, 1) // int
@@ -351,8 +338,8 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
        << LineColumnList();
    QTest::newRow("non-latin1 identifiers 1")
-        << _("int \u00FC;\n"
+        << _("int " UC_U00FC ";\n"
-             "int \u00FC;")
+             "int " UC_U00FC ";")
        << (LineColumnList()
            << LineColumn(0, 0)
            << LineColumn(1, 1) // int 1
@@ -366,9 +353,9 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
        << LineColumnList();
    QTest::newRow("non-latin1 identifiers 2")
-        << _("int \u00FC\u4E8C\U00010302;\n"
+        << _("int " UC_U00FC UC_U4E8C UC_U10302 ";\n"
             "int v;\n"
-             "int \U00010302\u4E8C;")
+             "int " UC_U10302 UC_U4E8C ";")
        << (LineColumnList()
            << LineColumn(0, 0)
            << LineColumn(1, 1) // int 1
@@ -402,7 +389,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
        << LineColumnList();
    QTest::newRow("non-latin1 string literal")
-        << _("char t[] = \"i\u00FC\u4E8C\U00010302\";")
+        << _("char t[] = \"i" UC_U00FC UC_U4E8C UC_U10302 "\";")
        << (LineColumnList()
            << LineColumn(0, 0)
            << LineColumn(1, 1)  // char
@@ -417,7 +404,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
        << LineColumnList();
    QTest::newRow("non-latin1 string literal multiple lines")
-        << _("char t[] = \"i\u00FC\u4E8C\U00010302 \\\n"
+        << _("char t[] = \"i" UC_U00FC UC_U4E8C UC_U10302 " \\\n"
             "\";")
        << (LineColumnList()
            << LineColumn(0, 0)
@@ -476,7 +463,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
           );
    QTest::newRow("non-latin1 c++ comment line")
-        << _("// comment line \u00FC\u4E8C\U00010302\n"
+        << _("// comment line " UC_U00FC UC_U4E8C UC_U10302 "\n"
             "int i;")
        << (LineColumnList()
            << LineColumn(0, 0)