C++/CppTools: Pass UTF-8 encoded source to Lexer

The Lexer can handle it now.

Task-number: QTCREATORBUG-7356
Change-Id: I8c4b03a247656e013d44c3cedca4835e133d4036
Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
This commit is contained in:
Nikolai Kosjar
2013-12-12 21:36:01 +01:00
parent 587eb49c4e
commit 57ff992961
6 changed files with 6 additions and 34 deletions

View File

@@ -41,18 +41,6 @@ FastPreprocessor::FastPreprocessor(const Snapshot &snapshot)
, _preproc(this, &_env) , _preproc(this, &_env)
{ } { }
// This is a temporary fix to handle non-ascii characters. This can be removed when the lexer can
// handle multi-byte characters.
static QByteArray convertToLatin1(const QByteArray &contents)
{
const char *p = contents.constData();
while (char ch = *p++)
if (ch & 0x80)
return QString::fromUtf8(contents).toLatin1();
return contents;
}
QByteArray FastPreprocessor::run(Document::Ptr newDoc, const QByteArray &source) QByteArray FastPreprocessor::run(Document::Ptr newDoc, const QByteArray &source)
{ {
std::swap(newDoc, _currentDoc); std::swap(newDoc, _currentDoc);
@@ -72,9 +60,7 @@ QByteArray FastPreprocessor::run(Document::Ptr newDoc, const QByteArray &source)
mergeEnvironment(i.resolvedFileName()); mergeEnvironment(i.resolvedFileName());
} }
QByteArray src = convertToLatin1(source); const QByteArray preprocessed = _preproc.run(fileName, source);
const QByteArray preprocessed = _preproc.run(fileName, src);
// qDebug("FastPreprocessor::run for %s produced [[%s]]", fileName.toUtf8().constData(), preprocessed.constData()); // qDebug("FastPreprocessor::run for %s produced [[%s]]", fileName.toUtf8().constData(), preprocessed.constData());
std::swap(newDoc, _currentDoc); std::swap(newDoc, _currentDoc);
return preprocessed; return preprocessed;

View File

@@ -61,11 +61,11 @@ bool SimpleLexer::endedJoined() const
return _endedJoined; return _endedJoined;
} }
QList<Token> SimpleLexer::operator()(const QString &text, int state, bool convertToUtf8) QList<Token> SimpleLexer::operator()(const QString &text, int state)
{ {
QList<Token> tokens; QList<Token> tokens;
const QByteArray bytes = convertToUtf8 ? text.toUtf8() : text.toLatin1(); const QByteArray bytes = text.toUtf8();
const char *firstChar = bytes.constData(); const char *firstChar = bytes.constData();
const char *lastChar = firstChar + bytes.size(); const char *lastChar = firstChar + bytes.size();

View File

@@ -54,7 +54,7 @@ public:
bool endedJoined() const; bool endedJoined() const;
QList<Token> operator()(const QString &text, int state = 0, bool convertToUtf8 = false); QList<Token> operator()(const QString &text, int state = 0);
int state() const int state() const
{ return _lastState; } { return _lastState; }

View File

@@ -749,7 +749,7 @@ Preprocessor::Preprocessor(Client *client, Environment *env)
QByteArray Preprocessor::run(const QString &fileName, const QString &source) QByteArray Preprocessor::run(const QString &fileName, const QString &source)
{ {
return run(fileName, source.toLatin1()); return run(fileName, source.toUtf8());
} }
QByteArray Preprocessor::run(const QString &fileName, QByteArray Preprocessor::run(const QString &fileName,

View File

@@ -378,18 +378,6 @@ void CppPreprocessor::stopSkippingBlocks(unsigned offset)
m_currentDoc->stopSkippingBlocks(offset); m_currentDoc->stopSkippingBlocks(offset);
} }
// This is a temporary fix to handle non-ascii characters. This can be removed when the lexer can
// handle multi-byte characters.
static QByteArray convertToLatin1(const QByteArray &contents)
{
const char *p = contents.constData();
while (char ch = *p++)
if (ch & 0x80)
return QString::fromUtf8(contents).toLatin1();
return contents;
}
void CppPreprocessor::sourceNeeded(unsigned line, const QString &fileName, IncludeType type) void CppPreprocessor::sourceNeeded(unsigned line, const QString &fileName, IncludeType type)
{ {
typedef Document::DiagnosticMessage Message; typedef Document::DiagnosticMessage Message;
@@ -424,7 +412,6 @@ void CppPreprocessor::sourceNeeded(unsigned line, const QString &fileName, Inclu
unsigned editorRevision = 0; unsigned editorRevision = 0;
QByteArray contents; QByteArray contents;
const bool gotFileContents = getFileContents(absoluteFileName, &contents, &editorRevision); const bool gotFileContents = getFileContents(absoluteFileName, &contents, &editorRevision);
contents = convertToLatin1(contents);
if (m_currentDoc && !gotFileContents) { if (m_currentDoc && !gotFileContents) {
const QString text = QCoreApplication::translate( const QString text = QCoreApplication::translate(
"CppPreprocessor", "%1: Could not get file contents").arg(fileName); "CppPreprocessor", "%1: Could not get file contents").arg(fileName);

View File

@@ -114,8 +114,7 @@ void tst_SimpleLexer::run(const QByteArray &source,
QVERIFY(compareFlags); QVERIFY(compareFlags);
SimpleLexer lexer; SimpleLexer lexer;
const QList<Token> tokenList = lexer(source, preserveState ? _state : 0, const QList<Token> tokenList = lexer(source, preserveState ? _state : 0);
/*convertToUtf8=*/ true);
if (preserveState) if (preserveState)
_state = lexer.state(); _state = lexer.state();