From 17c1325cc45eb563333d28725a51c04e33cd01f2 Mon Sep 17 00:00:00 2001 From: Nikolai Kosjar Date: Fri, 29 Apr 2016 11:35:42 +0200 Subject: [PATCH] Clang: Fix dot to arrow correction The position of the dot character was determined on an outdated translation unit. We queried the translation unit for the source location of the dot character, but apparently clang_codeCompleteAt() does not update the source locations for the translation unit. And we do not want to reparse since this is expensive. Thus, determine the byte position manually by scanning over the document until the right line/column is reached. Change-Id: I25e256bb81a83bb71c7e46a0fb3e927bf4031b16 Reviewed-by: Erik Verbruggen Reviewed-by: Eike Ziller --- .../ipcsource/clangbackendclangipc-source.pri | 6 +- .../clangbackend/ipcsource/codecompleter.cpp | 3 +- .../clangbackend/ipcsource/unsavedfile.cpp | 12 ++ .../clangbackend/ipcsource/unsavedfile.h | 2 + .../ipcsource/utf8positionfromlinecolumn.cpp | 107 +++++++++++ .../ipcsource/utf8positionfromlinecolumn.h | 54 ++++++ tests/unit/unittest/codecompletiontest.cpp | 20 +++ ...ithDotArrowCorrectionForPointerInitial.cpp | 6 + ...ithDotArrowCorrectionForPointerUpdated.cpp | 6 + tests/unit/unittest/unittest.pro | 3 +- tests/unit/unittest/unsavedfiletest.cpp | 7 + .../utf8positionfromlinecolumntest.cpp | 166 ++++++++++++++++++ 12 files changed, 387 insertions(+), 5 deletions(-) create mode 100644 src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.cpp create mode 100644 src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.h create mode 100644 tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerInitial.cpp create mode 100644 tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerUpdated.cpp create mode 100644 tests/unit/unittest/utf8positionfromlinecolumntest.cpp diff --git a/src/tools/clangbackend/ipcsource/clangbackendclangipc-source.pri b/src/tools/clangbackend/ipcsource/clangbackendclangipc-source.pri index 6140f1ca888..e5ecc0cb2a7 100644 --- a/src/tools/clangbackend/ipcsource/clangbackendclangipc-source.pri +++ b/src/tools/clangbackend/ipcsource/clangbackendclangipc-source.pri @@ -33,7 +33,8 @@ HEADERS += $$PWD/clangipcserver.h \ $$PWD/highlightinginformationsiterator.h \ $$PWD/skippedsourceranges.h \ $$PWD/clangtranslationunit.h \ - $$PWD/clangtype.h + $$PWD/clangtype.h \ + $$PWD/utf8positionfromlinecolumn.h SOURCES += $$PWD/clangipcserver.cpp \ $$PWD/codecompleter.cpp \ @@ -66,4 +67,5 @@ SOURCES += $$PWD/clangipcserver.cpp \ $$PWD/highlightinginformation.cpp \ $$PWD/skippedsourceranges.cpp \ $$PWD/clangtranslationunit.cpp \ - $$PWD/clangtype.cpp + $$PWD/clangtype.cpp \ + $$PWD/utf8positionfromlinecolumn.cpp diff --git a/src/tools/clangbackend/ipcsource/codecompleter.cpp b/src/tools/clangbackend/ipcsource/codecompleter.cpp index 7381ac2d27f..436c6680524 100644 --- a/src/tools/clangbackend/ipcsource/codecompleter.cpp +++ b/src/tools/clangbackend/ipcsource/codecompleter.cpp @@ -96,9 +96,8 @@ ClangCodeCompleteResults CodeCompleter::complete(uint line, bool CodeCompleter::hasDotAt(uint line, uint column) const { const UnsavedFile &unsavedFile = translationUnit.unsavedFile(); - const SourceLocation location = translationUnit.sourceLocationAtWithoutReparsing(line, column); - return unsavedFile.hasCharacterAt(location.offset(), '.'); + return unsavedFile.hasCharacterAt(line, column, '.'); } uint CodeCompleter::defaultOptions() const diff --git a/src/tools/clangbackend/ipcsource/unsavedfile.cpp b/src/tools/clangbackend/ipcsource/unsavedfile.cpp index 2cb3419545e..a4d6f0fb82d 100644 --- a/src/tools/clangbackend/ipcsource/unsavedfile.cpp +++ b/src/tools/clangbackend/ipcsource/unsavedfile.cpp @@ -26,6 +26,7 @@ #include "unsavedfile.h" #include "utf8string.h" +#include "utf8positionfromlinecolumn.h" #include #include @@ -70,6 +71,17 @@ const char *UnsavedFile::filePath() const return cxUnsavedFile.Filename; } +bool UnsavedFile::hasCharacterAt(uint line, uint column, char character) const +{ + Utf8PositionFromLineColumn converter(cxUnsavedFile.Contents); + if (converter.find(line, column)) { + const uint utf8Position = converter.position(); + return hasCharacterAt(utf8Position, character); + } + + return false; +} + bool UnsavedFile::hasCharacterAt(uint position, char character) const { if (position < cxUnsavedFile.Length) diff --git a/src/tools/clangbackend/ipcsource/unsavedfile.h b/src/tools/clangbackend/ipcsource/unsavedfile.h index 51b66c1b393..3433093ae15 100644 --- a/src/tools/clangbackend/ipcsource/unsavedfile.h +++ b/src/tools/clangbackend/ipcsource/unsavedfile.h @@ -54,6 +54,8 @@ public: const char *filePath() const; + // 1-based line and column + bool hasCharacterAt(uint line, uint column, char character) const; bool hasCharacterAt(uint position, char character) const; bool replaceAt(uint position, uint length, const Utf8String &replacement); diff --git a/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.cpp b/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.cpp new file mode 100644 index 00000000000..1d7ea1a88f0 --- /dev/null +++ b/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.cpp @@ -0,0 +1,107 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of Qt Creator. +** +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +****************************************************************************/ + +#include "utf8positionfromlinecolumn.h" + +#include + +namespace ClangBackEnd { + +Utf8PositionFromLineColumn::Utf8PositionFromLineColumn(const char *utf8Text) + : m_utf8Text(utf8Text) + , m_currentByte(utf8Text) +{ +} + +bool Utf8PositionFromLineColumn::find(uint line, uint column) +{ + if (!m_utf8Text || *m_utf8Text == '\0' || line == 0 || column == 0) + return false; + + return advanceToLine(line) + && advanceToColumn(column); +} + +uint Utf8PositionFromLineColumn::position() const +{ + return m_previousByte - m_utf8Text; +} + +bool Utf8PositionFromLineColumn::advanceToLine(uint line) +{ + if (line == 1) + return true; + + uint currentLine = 1; + do { + if (*m_currentByte == '\n' && ++currentLine == line) { + advanceCodePoint(); + return true; + } + } while (advanceCodePoint()); + + return false; +} + +bool Utf8PositionFromLineColumn::advanceToColumn(uint column) +{ + while (column) { + if (advanceCodePoint(/*stopOnNewLine=*/ true)) + --column; + else + break; + } + + return column == 0; +} + +static bool isByteOfMultiByteCodePoint(unsigned char byte) +{ + return byte & 0x80; // Check if most significant bit is set +} + +bool Utf8PositionFromLineColumn::advanceCodePoint(bool stopOnNewLine) +{ + if (Q_UNLIKELY(*m_currentByte == '\0') || (stopOnNewLine && *m_currentByte == '\n')) + return false; + + m_previousByte = m_currentByte; + + // Process multi-byte UTF-8 code point (non-latin1) + if (Q_UNLIKELY(isByteOfMultiByteCodePoint(*m_currentByte))) { + unsigned trailingBytesCurrentCodePoint = 1; + for (unsigned char c = (*m_currentByte) << 2; isByteOfMultiByteCodePoint(c); c <<= 1) + ++trailingBytesCurrentCodePoint; + m_currentByte += trailingBytesCurrentCodePoint + 1; + + // Process single-byte UTF-8 code point (latin1) + } else { + ++m_currentByte; + } + + return true; +} + +} // namespace ClangBackEnd diff --git a/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.h b/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.h new file mode 100644 index 00000000000..facb1527728 --- /dev/null +++ b/src/tools/clangbackend/ipcsource/utf8positionfromlinecolumn.h @@ -0,0 +1,54 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of Qt Creator. +** +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +****************************************************************************/ + +#pragma once + +namespace ClangBackEnd { + +using uint = unsigned; + +class Utf8PositionFromLineColumn +{ +public: + Utf8PositionFromLineColumn(const char *utf8Text); + + // 1-based line and column + bool find(uint line, uint column); + + uint position() const; + +private: + bool advanceToLine(uint line); + bool advanceToColumn(uint column); + bool advanceCodePoint(bool stopOnNewLine = false); + +private: + const char * const m_utf8Text = 0; + + const char *m_previousByte = 0; + const char *m_currentByte = 0; +}; + +} // namespace ClangBackEnd diff --git a/tests/unit/unittest/codecompletiontest.cpp b/tests/unit/unittest/codecompletiontest.cpp index 8ace4a8606a..a370f935867 100644 --- a/tests/unit/unittest/codecompletiontest.cpp +++ b/tests/unit/unittest/codecompletiontest.cpp @@ -112,6 +112,18 @@ protected: readFileContent(QStringLiteral("/complete_withDotArrowCorrectionForPointer.cpp")), true }; + ClangBackEnd::FileContainer dotArrowCorrectionForPointerFileContainerInitial{ + Utf8StringLiteral(TESTDATA_DIR"/complete_withDotArrowCorrectionForPointer.cpp"), + projectPart.projectPartId(), + readFileContent(QStringLiteral("/complete_withDotArrowCorrectionForPointerInitial.cpp")), + true + }; + ClangBackEnd::FileContainer dotArrowCorrectionForPointerFileContainerUpdated{ + Utf8StringLiteral(TESTDATA_DIR"/complete_withDotArrowCorrectionForPointer.cpp"), + projectPart.projectPartId(), + readFileContent(QStringLiteral("/complete_withDotArrowCorrectionForPointerUpdated.cpp")), + true + }; ClangBackEnd::FileContainer noDotArrowCorrectionForObjectFileContainer{ Utf8StringLiteral(TESTDATA_DIR"/complete_withNoDotArrowCorrectionForObject.cpp"), projectPart.projectPartId(), @@ -308,6 +320,14 @@ TEST_F(CodeCompleter, HasDotAt) ASSERT_TRUE(myCompleter.hasDotAt(5, 8)); } +TEST_F(CodeCompleter, HasDotAtWithUpdatedUnsavedFile) +{ + auto myCompleter = setupCompleter(dotArrowCorrectionForPointerFileContainerInitial); + unsavedFiles.createOrUpdate({dotArrowCorrectionForPointerFileContainerUpdated}); + + ASSERT_TRUE(myCompleter.hasDotAt(5, 8)); +} + TEST_F(CodeCompleter, HasNoDotAtDueToMissingUnsavedFile) { const ClangBackEnd::FileContainer fileContainer = dotArrowCorrectionForPointerFileContainer; diff --git a/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerInitial.cpp b/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerInitial.cpp new file mode 100644 index 00000000000..0472cfacaa4 --- /dev/null +++ b/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerInitial.cpp @@ -0,0 +1,6 @@ +struct Foo { int member; }; + +void g(Foo *foo) +{ + +} diff --git a/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerUpdated.cpp b/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerUpdated.cpp new file mode 100644 index 00000000000..9f8d3645b2e --- /dev/null +++ b/tests/unit/unittest/data/complete_withDotArrowCorrectionForPointerUpdated.cpp @@ -0,0 +1,6 @@ +struct Foo { int member; }; + +void g(Foo *foo) +{ + foo. +} diff --git a/tests/unit/unittest/unittest.pro b/tests/unit/unittest/unittest.pro index 55e35621ed6..dab05e794fd 100644 --- a/tests/unit/unittest/unittest.pro +++ b/tests/unit/unittest/unittest.pro @@ -68,7 +68,8 @@ SOURCES += \ unsavedfiletest.cpp \ clangisdiagnosticrelatedtolocationtest.cpp \ smallstringtest.cpp \ - sizedarraytest.cpp + sizedarraytest.cpp \ + utf8positionfromlinecolumntest.cpp exists($$GOOGLEBENCHMARK_DIR) { SOURCES += \ diff --git a/tests/unit/unittest/unsavedfiletest.cpp b/tests/unit/unittest/unsavedfiletest.cpp index 80f22ded354..3f0ae6d90b1 100644 --- a/tests/unit/unittest/unsavedfiletest.cpp +++ b/tests/unit/unittest/unsavedfiletest.cpp @@ -171,4 +171,11 @@ TEST_F(UnsavedFile, HasCharacterForCorrectOffset) ASSERT_TRUE(unsavedFile.hasCharacterAt(0, 'c')); } +TEST_F(UnsavedFile, HasCharacterForLastLineColumn) +{ + ::UnsavedFile unsavedFile(filePath, fileContent); + + ASSERT_TRUE(unsavedFile.hasCharacterAt(1, 7, 't')); +} + } // anonymous namespace diff --git a/tests/unit/unittest/utf8positionfromlinecolumntest.cpp b/tests/unit/unittest/utf8positionfromlinecolumntest.cpp new file mode 100644 index 00000000000..683483ddab7 --- /dev/null +++ b/tests/unit/unittest/utf8positionfromlinecolumntest.cpp @@ -0,0 +1,166 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of Qt Creator. +** +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +****************************************************************************/ + +#include "gtest/gtest.h" +#include "gmock/gmock-matchers.h" +#include "gmock/gmock.h" +#include "gtest-qt-printing.h" + +#include +#include + +using ClangBackEnd::Utf8PositionFromLineColumn; +using ::testing::PrintToString; + +namespace { + +class Utf8PositionFromLineColumn : public ::testing::Test +{ +protected: + Utf8String empty; + Utf8String asciiWord = Utf8StringLiteral("FOO"); + Utf8String asciiMultiLine = Utf8StringLiteral("FOO\nBAR"); + Utf8String asciiEmptyMultiLine = Utf8StringLiteral("\n\n"); + // U+00FC - 2 code units in UTF8, 1 in UTF16 - LATIN SMALL LETTER U WITH DIAERESIS + // U+4E8C - 3 code units in UTF8, 1 in UTF16 - CJK UNIFIED IDEOGRAPH-4E8C + // U+10302 - 4 code units in UTF8, 2 in UTF16 - OLD ITALIC LETTER KE + Utf8String nonAsciiMultiLine = Utf8StringLiteral("\xc3\xbc" "\n" + "\xe4\xba\x8c" "\n" + "\xf0\x90\x8c\x82" "X"); +}; + +TEST_F(Utf8PositionFromLineColumn, FindsNothingForEmptyContents) +{ + ::Utf8PositionFromLineColumn converter(empty.constData()); + + ASSERT_FALSE(converter.find(1, 1)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingForNoContents) +{ + ::Utf8PositionFromLineColumn converter(0); + + ASSERT_FALSE(converter.find(1, 1)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingForLineZero) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_FALSE(converter.find(0, 1)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingForColumnZero) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_FALSE(converter.find(1, 0)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsFirstForAsciiWord) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_TRUE(converter.find(1, 1)); + ASSERT_THAT(converter.position(), 0); +} + +TEST_F(Utf8PositionFromLineColumn, FindsLastForAsciiWord) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_TRUE(converter.find(1, 3)); + ASSERT_THAT(converter.position(), 2); +} + +TEST_F(Utf8PositionFromLineColumn, FindsFirstForSecondLineAscii) +{ + ::Utf8PositionFromLineColumn converter(asciiMultiLine.constData()); + + ASSERT_TRUE(converter.find(2, 1)); + ASSERT_THAT(converter.position(), 4); +} + +TEST_F(Utf8PositionFromLineColumn, FindsLastForAsciiMultiLine) +{ + ::Utf8PositionFromLineColumn converter(asciiMultiLine.constData()); + + ASSERT_TRUE(converter.find(2, 3)); + ASSERT_THAT(converter.position(), 6); +} + +TEST_F(Utf8PositionFromLineColumn, FindsLastInFirstLine) +{ + ::Utf8PositionFromLineColumn converter(asciiMultiLine.constData()); + + ASSERT_TRUE(converter.find(1, 3)); + ASSERT_THAT(converter.position(), 2); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingBeyondMaxColumn) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_FALSE(converter.find(1, 4)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingBeyondMaxColumnForMultiLine) +{ + ::Utf8PositionFromLineColumn converter(asciiMultiLine.constData()); + + ASSERT_FALSE(converter.find(1, 4)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingBeyondMaxLine) +{ + ::Utf8PositionFromLineColumn converter(asciiWord.constData()); + + ASSERT_FALSE(converter.find(2, 1)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsNothingForLineWithoutContent) +{ + ::Utf8PositionFromLineColumn converter(asciiEmptyMultiLine.constData()); + + ASSERT_FALSE(converter.find(1, 1)); +} + +TEST_F(Utf8PositionFromLineColumn, FindsFirstOnNonAsciiMultiLine) +{ + ::Utf8PositionFromLineColumn converter(nonAsciiMultiLine.constData()); + + ASSERT_TRUE(converter.find(1, 1)); + ASSERT_THAT(converter.position(), 0); +} + +TEST_F(Utf8PositionFromLineColumn, FindsLastOnNonAsciiMultiLine) +{ + ::Utf8PositionFromLineColumn converter(nonAsciiMultiLine.constData()); + + ASSERT_TRUE(converter.find(3, 2)); + ASSERT_THAT(converter.position(), 11); +} + +} // anonymous namespace