C++: Tests: Fix unicode tests with MSVC

Change-Id: I5575826558bf60982ecc964e4dd85a3f4e920197
Reviewed-by: Christian Stenger <christian.stenger@digia.com>
This commit is contained in:
Nikolai Kosjar
2014-05-30 12:09:05 -04:00
committed by Christian Stenger
parent 2e5ce2a8dc
commit 655470cc0c
15 changed files with 175 additions and 126 deletions
+20 -2
View File
@@ -1784,21 +1784,39 @@ void CppEditorPlugin::test_quickfix_InsertDefFromDecl_unicodeIdentifier()
QByteArray original; QByteArray original;
QByteArray expected; QByteArray expected;
//
// The following "non-latin1" code points are used in the tests:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
#define UNICODE_U00FC "\xc3\xbc"
#define UNICODE_U4E8C "\xe4\xba\x8c"
#define UNICODE_U10302 "\xf0\x90\x8c\x82"
#define TEST_UNICODE_IDENTIFIER UNICODE_U00FC UNICODE_U4E8C UNICODE_U10302
original = original =
"class Foo {\n" "class Foo {\n"
" void @\u00FC\u4E8C\U00010302();\n" " void @" TEST_UNICODE_IDENTIFIER "();\n"
"};\n"; "};\n";
; ;
expected = original; expected = original;
expected += expected +=
"\n" "\n"
"\n" "\n"
"void Foo::\u00FC\u4E8C\U00010302()\n" "void Foo::" TEST_UNICODE_IDENTIFIER "()\n"
"{\n" "{\n"
"\n" "\n"
"}\n"; "}\n";
testFiles << QuickFixTestDocument::create("file.cpp", original, expected); testFiles << QuickFixTestDocument::create("file.cpp", original, expected);
#undef UNICODE_U00FC
#undef UNICODE_U4E8C
#undef UNICODE_U10302
#undef TEST_UNICODE_IDENTIFIER
InsertDefFromDecl factory; InsertDefFromDecl factory;
QuickFixTestCase(testFiles, &factory); QuickFixTestCase(testFiles, &factory);
} }
@@ -44,6 +44,19 @@
#include <QDir> #include <QDir>
#include <QtTest> #include <QtTest>
//
// The following "non-latin1" code points are used in the tests:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
#define UNICODE_U00FC "\xc3\xbc"
#define UNICODE_U4E8C "\xe4\xba\x8c"
#define UNICODE_U10302 "\xf0\x90\x8c\x82"
#define TEST_UNICODE_IDENTIFIER UNICODE_U00FC UNICODE_U4E8C UNICODE_U10302
/*! /*!
Tests for Follow Symbol Under Cursor and Switch Between Function Declaration/Definition Tests for Follow Symbol Under Cursor and Switch Between Function Declaration/Definition
@@ -501,8 +514,8 @@ void CppEditorPlugin::test_SwitchMethodDeclarationDefinition_data()
) << _(); ) << _();
QTest::newRow("unicodeIdentifier") << _( QTest::newRow("unicodeIdentifier") << _(
"class Foo { void $\u00FC\u4E8C\U00010302(); };\n" "class Foo { void $" TEST_UNICODE_IDENTIFIER "(); };\n"
"void Foo::@\u00FC\u4E8C\U00010302() {}\n" "void Foo::@" TEST_UNICODE_IDENTIFIER "() {}\n"
) << _(); ) << _();
} }
@@ -921,8 +934,8 @@ void CppEditorPlugin::test_FollowSymbolUnderCursor_data()
); );
QTest::newRow("unicodeIdentifier") << _( QTest::newRow("unicodeIdentifier") << _(
"class Foo { void $\u00FC\u4E8C\U00010302(); };\n" "class Foo { void $" TEST_UNICODE_IDENTIFIER "(); };\n"
"void Foo::@\u00FC\u4E8C\U00010302() {}\n" "void Foo::@" TEST_UNICODE_IDENTIFIER "() {}\n"
); );
} }
@@ -1,2 +1,3 @@
include(../shared/shared.pri) include(../shared/shared.pri)
SOURCES += tst_checksymbols.cpp SOURCES += tst_checksymbols.cpp
HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
CPlusPlusAutotest { CPlusPlusAutotest {
name: "CPlusPlus check symbols autotest" name: "CPlusPlus check symbols autotest"
files: "tst_checksymbols.cpp" files: [ "tst_checksymbols.cpp", "../cplusplus_global.h" ]
} }
@@ -27,6 +27,8 @@
** **
****************************************************************************/ ****************************************************************************/
#include "../cplusplus_global.h"
#include <cplusplus/CppDocument.h> #include <cplusplus/CppDocument.h>
#include <cplusplus/pp.h> #include <cplusplus/pp.h>
@@ -1595,9 +1597,9 @@ void tst_CheckSymbols::test_checksymbols_data()
<< Use(12, 23, 1, Highlighting::LocalUse)); << Use(12, 23, 1, Highlighting::LocalUse));
QTest::newRow("using_inside_different_namespace_QTCREATORBUG7978") QTest::newRow("using_inside_different_namespace_QTCREATORBUG7978")
<< _("class My\u00FC\u4E8C\U00010302Type { int \u00FC\u4E8C\U00010302Member; };\n" << _("class My" TEST_UNICODE_IDENTIFIER "Type { int " TEST_UNICODE_IDENTIFIER "Member; };\n"
"void f(My\u00FC\u4E8C\U00010302Type var\u00FC\u4E8C\U00010302)\n" "void f(My" TEST_UNICODE_IDENTIFIER "Type var" TEST_UNICODE_IDENTIFIER ")\n"
"{ var\u00FC\u4E8C\U00010302.\u00FC\u4E8C\U00010302Member = 0; }\n") "{ var" TEST_UNICODE_IDENTIFIER "." TEST_UNICODE_IDENTIFIER "Member = 0; }\n")
<< (UseList() << (UseList()
<< Use(1, 7, 10, Highlighting::TypeUse) << Use(1, 7, 10, Highlighting::TypeUse)
<< Use(1, 24, 10, Highlighting::FieldUse) << Use(1, 24, 10, Highlighting::FieldUse)
@@ -1607,18 +1609,10 @@ void tst_CheckSymbols::test_checksymbols_data()
<< Use(3, 3, 7, Highlighting::LocalUse) << Use(3, 3, 7, Highlighting::LocalUse)
<< Use(3, 11, 10, Highlighting::FieldUse)); << Use(3, 11, 10, Highlighting::FieldUse));
//
// The following "non-latin1" code points are used in the next tests:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
QTest::newRow("unicodeIdentifier1") QTest::newRow("unicodeIdentifier1")
<< _("class My\u00FC\u4E8C\U00010302Type { int \u00FC\u4E8C\U00010302Member; };\n" << _("class My" TEST_UNICODE_IDENTIFIER "Type { int " TEST_UNICODE_IDENTIFIER "Member; };\n"
"void f(My\u00FC\u4E8C\U00010302Type var\u00FC\u4E8C\U00010302)\n" "void f(My" TEST_UNICODE_IDENTIFIER "Type var" TEST_UNICODE_IDENTIFIER ")\n"
"{ var\u00FC\u4E8C\U00010302.\u00FC\u4E8C\U00010302Member = 0; }\n") "{ var" TEST_UNICODE_IDENTIFIER "." TEST_UNICODE_IDENTIFIER "Member = 0; }\n")
<< (UseList() << (UseList()
<< Use(1, 7, 10, CppHighlightingSupport::TypeUse) << Use(1, 7, 10, CppHighlightingSupport::TypeUse)
<< Use(1, 24, 10, CppHighlightingSupport::FieldUse) << Use(1, 24, 10, CppHighlightingSupport::FieldUse)
@@ -1629,15 +1623,15 @@ void tst_CheckSymbols::test_checksymbols_data()
<< Use(3, 11, 10, CppHighlightingSupport::FieldUse)); << Use(3, 11, 10, CppHighlightingSupport::FieldUse));
QTest::newRow("unicodeIdentifier2") QTest::newRow("unicodeIdentifier2")
<< _("class v\u00FC\u4E8C\U00010302\n" << _("class v" TEST_UNICODE_IDENTIFIER "\n"
"{\n" "{\n"
"public:\n" "public:\n"
" v\u00FC\u4E8C\U00010302();\n" " v" TEST_UNICODE_IDENTIFIER "();\n"
" ~v\u00FC\u4E8C\U00010302();\n" " ~v" TEST_UNICODE_IDENTIFIER "();\n"
"};\n" "};\n"
"\n" "\n"
"v\u00FC\u4E8C\U00010302::v\u00FC\u4E8C\U00010302() {}\n" "v" TEST_UNICODE_IDENTIFIER "::v" TEST_UNICODE_IDENTIFIER "() {}\n"
"v\u00FC\u4E8C\U00010302::~v\u00FC\u4E8C\U00010302() {}\n") "v" TEST_UNICODE_IDENTIFIER "::~v" TEST_UNICODE_IDENTIFIER "() {}\n")
<< (UseList() << (UseList()
<< Use(1, 7, 5, Highlighting::TypeUse) << Use(1, 7, 5, Highlighting::TypeUse)
<< Use(4, 5, 5, Highlighting::TypeUse) << Use(4, 5, 5, Highlighting::TypeUse)
+46
View File
@@ -0,0 +1,46 @@
/****************************************************************************
**
** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of Qt Creator.
**
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
****************************************************************************/
#ifndef CPLUSPLUS_GLOBAL_H
#define CPLUSPLUS_GLOBAL_H
//
// The following "non-latin1" code points are used in the tests:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
#define UC_U00FC "\xc3\xbc"
#define UC_U4E8C "\xe4\xba\x8c"
#define UC_U10302 "\xf0\x90\x8c\x82"
#define TEST_UNICODE_IDENTIFIER UC_U00FC UC_U4E8C UC_U10302
#endif // CPLUSPLUS_GLOBAL_H
@@ -1,2 +1,3 @@
include(../shared/shared.pri) include(../shared/shared.pri)
SOURCES += tst_findusages.cpp SOURCES += tst_findusages.cpp
HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
CPlusPlusAutotest { CPlusPlusAutotest {
name: "CPlusPlus find usages autotest" name: "CPlusPlus find usages autotest"
files: "tst_findusages.cpp" files: [ "tst_findusages.cpp", "../cplusplus_global.h" ]
} }
@@ -27,6 +27,8 @@
** **
****************************************************************************/ ****************************************************************************/
#include "../cplusplus_global.h"
#include <QtTest> #include <QtTest>
#include <QObject> #include <QObject>
#include <QList> #include <QList>
@@ -955,17 +957,9 @@ void tst_FindUsages::usingInDifferentNamespace_QTCREATORBUG7978()
void tst_FindUsages::unicodeIdentifier() void tst_FindUsages::unicodeIdentifier()
{ {
//
// The following "non-latin1" code points are used:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
const QByteArray src = "\n" const QByteArray src = "\n"
"int var\u00FC\u4E8C\U00010302;\n" "int var" TEST_UNICODE_IDENTIFIER ";\n"
"void f() { var\u00FC\u4E8C\U00010302 = 1; }\n"; "void f() { var" TEST_UNICODE_IDENTIFIER " = 1; }\n";
; ;
Document::Ptr doc = Document::create("u"); Document::Ptr doc = Document::create("u");
+1 -1
View File
@@ -1,3 +1,3 @@
include(../shared/shared.pri) include(../shared/shared.pri)
SOURCES += tst_lexer.cpp SOURCES += tst_lexer.cpp
HEADERS += ../cplusplus_global.h
+1 -1
View File
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
CPlusPlusAutotest { CPlusPlusAutotest {
name: "CPlusPlus lexer autotest" name: "CPlusPlus lexer autotest"
files: "tst_lexer.cpp" files: [ "tst_lexer.cpp", "../cplusplus_global.h" ]
} }
+27 -33
View File
@@ -27,6 +27,8 @@
** **
****************************************************************************/ ****************************************************************************/
#include "../cplusplus_global.h"
#include <cplusplus/Token.h> #include <cplusplus/Token.h>
#include <cplusplus/SimpleLexer.h> #include <cplusplus/SimpleLexer.h>
@@ -69,14 +71,6 @@ private slots:
void incremental(); void incremental();
void incremental_data(); void incremental_data();
//
// The following "non-latin1" code points are used in the tests following this comment:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
void bytes_and_utf16chars(); void bytes_and_utf16chars();
void bytes_and_utf16chars_data(); void bytes_and_utf16chars_data();
void offsets(); void offsets();
@@ -291,43 +285,43 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
// NON-LATIN1 identifier (code point with 2 UTF8 code units) // NON-LATIN1 identifier (code point with 2 UTF8 code units)
QTest::newRow("non-latin1 identifier (2-byte code unit at start)") QTest::newRow("non-latin1 identifier (2-byte code unit at start)")
<< _("\u00FC_var") << createToken(T_IDENTIFIER, 6, 5); << _(UC_U00FC "_var") << createToken(T_IDENTIFIER, 6, 5);
QTest::newRow("non-latin1 identifier (2-byte code unit in center)") QTest::newRow("non-latin1 identifier (2-byte code unit in center)")
<< _("_v\u00FCr_") << createToken(T_IDENTIFIER, 6, 5); << _("_v" UC_U00FC "r_") << createToken(T_IDENTIFIER, 6, 5);
QTest::newRow("non-latin1 identifier (2-byte code unit at end)") QTest::newRow("non-latin1 identifier (2-byte code unit at end)")
<< _("var_\u00FC") << createToken(T_IDENTIFIER, 6, 5); << _("var_" UC_U00FC) << createToken(T_IDENTIFIER, 6, 5);
QTest::newRow("non-latin1 identifier (2-byte code unit only)") QTest::newRow("non-latin1 identifier (2-byte code unit only)")
<< _("\u00FC") << createToken(T_IDENTIFIER, 2, 1); << _(UC_U00FC) << createToken(T_IDENTIFIER, 2, 1);
// NON-LATIN1 identifier (code point with 3 UTF8 code units) // NON-LATIN1 identifier (code point with 3 UTF8 code units)
QTest::newRow("non-latin1 identifier (3-byte code unit at start)") QTest::newRow("non-latin1 identifier (3-byte code unit at start)")
<< _("\u4E8C_var") << createToken(T_IDENTIFIER, 7, 5); << _(UC_U4E8C "_var") << createToken(T_IDENTIFIER, 7, 5);
QTest::newRow("non-latin1 identifier (3-byte code unit in center)") QTest::newRow("non-latin1 identifier (3-byte code unit in center)")
<< _("_v\u4E8Cr_") << createToken(T_IDENTIFIER, 7, 5); << _("_v" UC_U4E8C "r_") << createToken(T_IDENTIFIER, 7, 5);
QTest::newRow("non-latin1 identifier (3-byte code unit at end)") QTest::newRow("non-latin1 identifier (3-byte code unit at end)")
<< _("var_\u4E8C") << createToken(T_IDENTIFIER, 7, 5); << _("var_" UC_U4E8C) << createToken(T_IDENTIFIER, 7, 5);
QTest::newRow("non-latin1 identifier (3-byte code unit only)") QTest::newRow("non-latin1 identifier (3-byte code unit only)")
<< _("\u4E8C") << createToken(T_IDENTIFIER, 3, 1); << _(UC_U4E8C) << createToken(T_IDENTIFIER, 3, 1);
// NON-LATIN1 identifier (code point with 4 UTF8 code units) // NON-LATIN1 identifier (code point with 4 UTF8 code units)
QTest::newRow("non-latin1 identifier (4-byte code unit at start)") QTest::newRow("non-latin1 identifier (4-byte code unit at start)")
<< _("\U00010302_var") << createToken(T_IDENTIFIER, 8, 6); << _(UC_U10302 "_var") << createToken(T_IDENTIFIER, 8, 6);
QTest::newRow("non-latin1 identifier (4-byte code unit in center)") QTest::newRow("non-latin1 identifier (4-byte code unit in center)")
<< _("_v\U00010302r_") << createToken(T_IDENTIFIER, 8, 6); << _("_v" UC_U10302 "r_") << createToken(T_IDENTIFIER, 8, 6);
QTest::newRow("non-latin1 identifier (4-byte code unit at end)") QTest::newRow("non-latin1 identifier (4-byte code unit at end)")
<< _("var_\U00010302") << createToken(T_IDENTIFIER, 8, 6); << _("var_" UC_U10302) << createToken(T_IDENTIFIER, 8, 6);
QTest::newRow("non-latin1 identifier (4-byte code unit only)") QTest::newRow("non-latin1 identifier (4-byte code unit only)")
<< _("\U00010302") << createToken(T_IDENTIFIER, 4, 2); << _(UC_U10302) << createToken(T_IDENTIFIER, 4, 2);
// NON-LATIN1 identifier (code points with several multi-byte UTF8 code units) // NON-LATIN1 identifier (code points with several multi-byte UTF8 code units)
QTest::newRow("non-latin1 identifier (mixed multi-byte code units at start)") QTest::newRow("non-latin1 identifier (mixed multi-byte code units at start)")
<< _("\u00FC\u4E8C\U00010302_var") << createToken(T_IDENTIFIER, 13, 8); << _(UC_U00FC UC_U4E8C UC_U10302 "_var") << createToken(T_IDENTIFIER, 13, 8);
QTest::newRow("non-latin1 identifier (mixed multi-byte code units in center)") QTest::newRow("non-latin1 identifier (mixed multi-byte code units in center)")
<< _("_v\u00FC\u4E8C\U00010302r_") << createToken(T_IDENTIFIER, 13, 8); << _("_v" UC_U00FC UC_U4E8C UC_U10302 "r_") << createToken(T_IDENTIFIER, 13, 8);
QTest::newRow("non-latin1 identifier (mixed multi-byte code units at end)") QTest::newRow("non-latin1 identifier (mixed multi-byte code units at end)")
<< _("var_\u00FC\u4E8C\U00010302") << createToken(T_IDENTIFIER, 13, 8); << _("var_" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 13, 8);
QTest::newRow("non-latin1 identifier (mixed multi-byte code units only)") QTest::newRow("non-latin1 identifier (mixed multi-byte code units only)")
<< _("\u00FC\u4E8C\U00010302") << createToken(T_IDENTIFIER, 9, 4); << _(UC_U00FC UC_U4E8C UC_U10302) << createToken(T_IDENTIFIER, 9, 4);
// Comments // Comments
QTest::newRow("ascii comment /* ... */") QTest::newRow("ascii comment /* ... */")
@@ -335,19 +329,19 @@ void tst_SimpleLexer::bytes_and_utf16chars_data()
QTest::newRow("latin1 comment //") QTest::newRow("latin1 comment //")
<< _("// hello world") << createToken(T_CPP_COMMENT, 14, 14); << _("// hello world") << createToken(T_CPP_COMMENT, 14, 14);
QTest::newRow("non-latin1 comment /* ... */ (1)") QTest::newRow("non-latin1 comment /* ... */ (1)")
<< _("/* \u00FC\u4E8C\U00010302 */") << createToken(T_COMMENT, 15, 10); << _("/* " UC_U00FC UC_U4E8C UC_U10302 " */") << createToken(T_COMMENT, 15, 10);
QTest::newRow("non-latin1 comment /* ... */ (2)") QTest::newRow("non-latin1 comment /* ... */ (2)")
<< _("/*\u00FC\u4E8C\U00010302*/") << createToken(T_COMMENT, 13, 8); << _("/*" UC_U00FC UC_U4E8C UC_U10302 "*/") << createToken(T_COMMENT, 13, 8);
QTest::newRow("non-latin1 comment // (1)") QTest::newRow("non-latin1 comment // (1)")
<< _("// \u00FC\u4E8C\U00010302") << createToken(T_CPP_COMMENT, 12, 7); << _("// " UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 12, 7);
QTest::newRow("non-latin1 comment // (2)") QTest::newRow("non-latin1 comment // (2)")
<< _("//\u00FC\u4E8C\U00010302") << createToken(T_CPP_COMMENT, 11, 6); << _("//" UC_U00FC UC_U4E8C UC_U10302) << createToken(T_CPP_COMMENT, 11, 6);
// String Literals // String Literals
QTest::newRow("latin1 string literal") QTest::newRow("latin1 string literal")
<< _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7); << _("\"hello\"") << createToken(T_STRING_LITERAL, 7, 7);
QTest::newRow("non-latin1 string literal") QTest::newRow("non-latin1 string literal")
<< _("\"\u00FC\u4E8C\U00010302\"") << createToken(T_STRING_LITERAL, 11, 6); << _("\"" UC_U00FC UC_U4E8C UC_U10302 "\"") << createToken(T_STRING_LITERAL, 11, 6);
} }
static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes, static Token createToken(unsigned kind, unsigned byteOffset, unsigned bytes,
@@ -392,21 +386,21 @@ void tst_SimpleLexer::offsets_data()
// NON-LATIN1 identifier // NON-LATIN1 identifier
QTest::newRow("non-latin1 identifiers 1") QTest::newRow("non-latin1 identifiers 1")
<< _("var_\u00FC var_\u00FC") << (QList<Token>() << _("var_" UC_U00FC " var_" UC_U00FC) << (QList<Token>()
<< createToken(T_IDENTIFIER, 0, 6, 0, 5) << createToken(T_IDENTIFIER, 0, 6, 0, 5)
<< createToken(T_IDENTIFIER, 7, 6, 6, 5) << createToken(T_IDENTIFIER, 7, 6, 6, 5)
); );
QTest::newRow("non-latin1 identifiers 2") QTest::newRow("non-latin1 identifiers 2")
<< _("\u00FC\u4E8C\U00010302 \u00FC\u4E8C\U00010302") << (QList<Token>() << _(UC_U00FC UC_U4E8C UC_U10302 " " UC_U00FC UC_U4E8C UC_U10302) << (QList<Token>()
<< createToken(T_IDENTIFIER, 0, 9, 0, 4) << createToken(T_IDENTIFIER, 0, 9, 0, 4)
<< createToken(T_IDENTIFIER, 10, 9, 5, 4) << createToken(T_IDENTIFIER, 10, 9, 5, 4)
); );
QTest::newRow("non-latin1 identifiers 3") // first code unit on line: <bytes> / <utf16char> QTest::newRow("non-latin1 identifiers 3") // first code unit on line: <bytes> / <utf16char>
<< _("class v\u00FC\u4E8C\U00010302\n" // 0 / 0 << _("class v" UC_U00FC UC_U4E8C UC_U10302 "\n" // 0 / 0
"{\n" // 17 / 12 "{\n" // 17 / 12
"public:\n" // 19 / 14 "public:\n" // 19 / 14
" v\u00FC\u4E8C\U00010302();\n" // 27 / 22 " v" UC_U00FC UC_U4E8C UC_U10302 "();\n" // 27 / 22
"};\n") << (QList<Token>() // 45 / 35 "};\n") << (QList<Token>() // 45 / 35
<< createToken(T_CLASS, 0, 5, 0, 5) // class << createToken(T_CLASS, 0, 5, 0, 5) // class
<< createToken(T_IDENTIFIER, 6, 10, 6, 5) // non-latin1 id << createToken(T_IDENTIFIER, 6, 10, 6, 5) // non-latin1 id
@@ -1,2 +1,3 @@
include(../shared/shared.pri) include(../shared/shared.pri)
SOURCES += tst_translationunit.cpp SOURCES += tst_translationunit.cpp
HEADERS += ../cplusplus_global.h
@@ -3,5 +3,5 @@ import "../cplusplusautotest.qbs" as CPlusPlusAutotest
CPlusPlusAutotest { CPlusPlusAutotest {
name: "CPlusPlus translation unit autotest" name: "CPlusPlus translation unit autotest"
files: "tst_translationunit.cpp" files: [ "tst_translationunit.cpp", "../cplusplus_global.h" ]
} }
@@ -27,6 +27,8 @@
** **
****************************************************************************/ ****************************************************************************/
#include "../cplusplus_global.h"
#include <cplusplus/PreprocessorClient.h> #include <cplusplus/PreprocessorClient.h>
#include <cplusplus/PreprocessorEnvironment.h> #include <cplusplus/PreprocessorEnvironment.h>
#include <cplusplus/Token.h> #include <cplusplus/Token.h>
@@ -56,15 +58,6 @@ class tst_TranslationUnit: public QObject
{ {
Q_OBJECT Q_OBJECT
private slots: private slots:
//
// The following "non-latin1" code points are used in the tests following this comment:
//
// U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS
// U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C
// U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE
//
void unicodeIdentifier(); void unicodeIdentifier();
void unicodeIdentifier_data(); void unicodeIdentifier_data();
@@ -211,37 +204,31 @@ void tst_TranslationUnit::unicodeIdentifier_data()
QTest::newRow("latin1 identifier") << _("var"); QTest::newRow("latin1 identifier") << _("var");
QTest::newRow("non-latin1 identifier 1") << _("prefix\u00FC\u4E8C\U00010302"); QTest::newRow("non-latin1 identifier 1") << _("prefix" UC_U00FC UC_U4E8C UC_U10302);
QTest::newRow("non-latin1 identifier 2") << _("prefix\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 2") << _("prefix" UC_U10302 UC_U00FC UC_U4E8C);
QTest::newRow("non-latin1 identifier 3") << _("\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 3") << _(UC_U10302 UC_U00FC UC_U4E8C);
QTest::newRow("non-latin1 identifier 4") << _("\u4E8C\U00010302\u00FC"); QTest::newRow("non-latin1 identifier 4") << _(UC_U4E8C UC_U10302 UC_U00FC);
QTest::newRow("non-latin1 identifier 5") << _("\u4E8C\U00010302\u00FCsuffix"); QTest::newRow("non-latin1 identifier 5") << _(UC_U4E8C UC_U10302 UC_U00FC "suffix");
QTest::newRow("non-latin1 identifier 6") << _("\U00010302\u00FC\u4E8Csuffix"); QTest::newRow("non-latin1 identifier 6") << _(UC_U10302 UC_U00FC UC_U4E8C "suffix");
// Some special cases (different code path inside lexer) // Some special cases (different code path inside lexer)
QTest::newRow("non-latin1 identifier 7") << _("LR\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 7") << _("LR" UC_U10302 UC_U00FC UC_U4E8C);
QTest::newRow("non-latin1 identifier 8") << _("u8R\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 8") << _("u8R" UC_U10302 UC_U00FC UC_U4E8C);
QTest::newRow("non-latin1 identifier 9") << _("u8\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 9") << _("u8" UC_U10302 UC_U00FC UC_U4E8C);
QTest::newRow("non-latin1 identifier 10") << _("u\U00010302\u00FC\u4E8C"); QTest::newRow("non-latin1 identifier 10") << _("u" UC_U10302 UC_U00FC UC_U4E8C);
} }
static QByteArray stripQuotesFromLiteral(const QByteArray literal) static QByteArray stripEncodingPrefixAndQuotationMarks(const QByteArray &literal)
{ {
QByteArray result = literal; const char quotationMark = '"';
const int firstQuotationMarkPosition = literal.indexOf(quotationMark);
// Strip front const int lastQuotationMarkPosition = literal.lastIndexOf(quotationMark);
while (!result.isEmpty() && result[0] != '"') Q_ASSERT(firstQuotationMarkPosition != -1);
result = result.mid(1); Q_ASSERT(lastQuotationMarkPosition == literal.size() - 1);
if (result.isEmpty()) Q_ASSERT(firstQuotationMarkPosition < lastQuotationMarkPosition - 1);
return QByteArray();
result = result.mid(1);
// Strip end
while (result.size() >= 2
&& (std::isspace(result[result.size() - 1]) || result[result.size()-1] == '"')) {
result.chop(1);
}
QByteArray result = literal.mid(firstQuotationMarkPosition + 1);
result.chop(1);
return result; return result;
} }
@@ -254,7 +241,7 @@ void tst_TranslationUnit::unicodeStringLiteral()
const StringLiteral *actual = document->lastStringLiteral(); const StringLiteral *actual = document->lastStringLiteral();
QCOMPARE(QString::fromUtf8(actual->chars(), actual->size()), QCOMPARE(QString::fromUtf8(actual->chars(), actual->size()),
QString::fromUtf8(stripQuotesFromLiteral(literalText))); QString::fromUtf8(stripEncodingPrefixAndQuotationMarks(literalText)));
} }
void tst_TranslationUnit::unicodeStringLiteral_data() void tst_TranslationUnit::unicodeStringLiteral_data()
@@ -265,17 +252,17 @@ void tst_TranslationUnit::unicodeStringLiteral_data()
QTest::newRow("latin1 literal") << _("\"var\""); QTest::newRow("latin1 literal") << _("\"var\"");
QTest::newRow("non-latin1 literal 1") << _("\"prefix\u00FC\u4E8C\U00010302\""); QTest::newRow("non-latin1 literal 1") << _("\"prefix" UC_U00FC UC_U4E8C UC_U10302 "\"");
QTest::newRow("non-latin1 literal 2") << _("\"prefix\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 2") << _("\"prefix" UC_U10302 UC_U00FC UC_U4E8C"\"");
QTest::newRow("non-latin1 literal 3") << _("\"\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 3") << _("\"" UC_U10302 UC_U00FC UC_U4E8C "\"");
QTest::newRow("non-latin1 literal 4") << _("\"\u4E8C\U00010302\u00FC\""); QTest::newRow("non-latin1 literal 4") << _("\"" UC_U4E8C UC_U10302 UC_U00FC "\"");
QTest::newRow("non-latin1 literal 5") << _("\"\u4E8C\U00010302\u00FCsuffix\""); QTest::newRow("non-latin1 literal 5") << _("\"" UC_U4E8C UC_U10302 UC_U00FC "suffix\"");
QTest::newRow("non-latin1 literal 6") << _("\"\U00010302\u00FC\u4E8Csuffix\""); QTest::newRow("non-latin1 literal 6") << _("\"" UC_U10302 UC_U00FC UC_U4E8C "suffix\"");
QTest::newRow("non-latin1 literal 7") << _("L\"\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 7") << _("L\"U10302U00FCU4E8C\"");
QTest::newRow("non-latin1 literal 8") << _("u8\"\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 8") << _("u8\"U10302U00FCU4E8C\"");
QTest::newRow("non-latin1 literal 9") << _("u\"\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 9") << _("u\"U10302U00FCU4E8C\"");
QTest::newRow("non-latin1 literal 10") << _("U\"\U00010302\u00FC\u4E8C\""); QTest::newRow("non-latin1 literal 10") << _("U\"U10302U00FCU4E8C\"");
} }
void tst_TranslationUnit::locationOfUtf16CharOffset() void tst_TranslationUnit::locationOfUtf16CharOffset()
@@ -340,7 +327,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
<< LineColumnList(); << LineColumnList();
QTest::newRow("non-latin1 identifier") QTest::newRow("non-latin1 identifier")
<< _("int \u00FC;") << _("int " UC_U00FC ";")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)
<< LineColumn(1, 1) // int << LineColumn(1, 1) // int
@@ -351,8 +338,8 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
<< LineColumnList(); << LineColumnList();
QTest::newRow("non-latin1 identifiers 1") QTest::newRow("non-latin1 identifiers 1")
<< _("int \u00FC;\n" << _("int " UC_U00FC ";\n"
"int \u00FC;") "int " UC_U00FC ";")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)
<< LineColumn(1, 1) // int 1 << LineColumn(1, 1) // int 1
@@ -366,9 +353,9 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
<< LineColumnList(); << LineColumnList();
QTest::newRow("non-latin1 identifiers 2") QTest::newRow("non-latin1 identifiers 2")
<< _("int \u00FC\u4E8C\U00010302;\n" << _("int " UC_U00FC UC_U4E8C UC_U10302 ";\n"
"int v;\n" "int v;\n"
"int \U00010302\u4E8C;") "int " UC_U10302 UC_U4E8C ";")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)
<< LineColumn(1, 1) // int 1 << LineColumn(1, 1) // int 1
@@ -402,7 +389,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
<< LineColumnList(); << LineColumnList();
QTest::newRow("non-latin1 string literal") QTest::newRow("non-latin1 string literal")
<< _("char t[] = \"i\u00FC\u4E8C\U00010302\";") << _("char t[] = \"i" UC_U00FC UC_U4E8C UC_U10302 "\";")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)
<< LineColumn(1, 1) // char << LineColumn(1, 1) // char
@@ -417,7 +404,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
<< LineColumnList(); << LineColumnList();
QTest::newRow("non-latin1 string literal multiple lines") QTest::newRow("non-latin1 string literal multiple lines")
<< _("char t[] = \"i\u00FC\u4E8C\U00010302 \\\n" << _("char t[] = \"i" UC_U00FC UC_U4E8C UC_U10302 " \\\n"
"\";") "\";")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)
@@ -476,7 +463,7 @@ void tst_TranslationUnit::locationOfUtf16CharOffset_data()
); );
QTest::newRow("non-latin1 c++ comment line") QTest::newRow("non-latin1 c++ comment line")
<< _("// comment line \u00FC\u4E8C\U00010302\n" << _("// comment line " UC_U00FC UC_U4E8C UC_U10302 "\n"
"int i;") "int i;")
<< (LineColumnList() << (LineColumnList()
<< LineColumn(0, 0) << LineColumn(0, 0)