Mimetypes v3: Improve performance

When looking up mimetypes by glob or magic, we may not add a mimetype if
it was overwritten by another provider that did not find a match.

The patch that implemented that was very inefficient, because a huge
list of "handled mimetypes" was created that needed to be checked,
every time a mimetype was determined.

Instead inform each provider about their overridden mimetypes once after
loading the mimetypes.

Amends 98b1e82d2b

Fixes: QTCREATORBUG-27319
Change-Id: I409856c272c334798d7a71ce590ff2d4f69c71a1
Reviewed-by: Jarek Kobus <jaroslaw.kobus@qt.io>
This commit is contained in:
Eike Ziller
2022-04-25 15:50:15 +02:00
parent 74b1623acd
commit c865fe50f2
5 changed files with 63 additions and 113 deletions

View File

@@ -47,6 +47,8 @@
#include "mimetype_p.h"
#include "mimeutils.h"
#include "algorithm.h"
#include <QtCore/QFile>
#include <QtCore/QFileInfo>
#include <QtCore/QStandardPaths>
@@ -99,6 +101,27 @@ bool MimeDatabasePrivate::shouldCheck()
# define QT_USE_MMAP
#endif
static void updateOverriddenMimeTypes(std::vector<std::unique_ptr<MimeProviderBase>> &providers)
{
// If a provider earlier in the list already defines a mimetype, it should override the
// mimetype definition of following providers. Go through everything once here, telling each
// provider which mimetypes are overridden by earlier providers.
QList<MimeProviderBase *> handledProviders;
for (std::unique_ptr<MimeProviderBase> &provider : providers) {
provider->m_overriddenMimeTypes.clear();
const QStringList ownMimetypes = provider->allMimeTypeNames();
for (MimeProviderBase *other : handledProviders) {
const QStringList overridden = Utils::filtered(ownMimetypes,
[other](const QString &type) {
return other->hasMimeTypeForName(
type);
});
provider->m_overriddenMimeTypes.unite(QSet(overridden.cbegin(), overridden.cend()));
}
handledProviders.append(provider.get());
}
}
void MimeDatabasePrivate::loadProviders()
{
#if 0
@@ -184,6 +207,8 @@ void MimeDatabasePrivate::loadProviders()
m_providers.push_back(std::move(*it));
}
}
updateOverriddenMimeTypes(m_providers);
}
const MimeDatabasePrivate::Providers &MimeDatabasePrivate::providers()
@@ -244,9 +269,8 @@ MimeGlobMatchResult MimeDatabasePrivate::findByFileName(const QString &fileName)
{
MimeGlobMatchResult result;
const QString fileNameExcludingPath = QFileInfo(fileName).fileName();
QList<QString> checkedMimeTypes;
for (const auto &provider : providers())
provider->addFileNameMatches(fileNameExcludingPath, result, checkedMimeTypes);
provider->addFileNameMatches(fileNameExcludingPath, result);
return result;
}
@@ -387,9 +411,8 @@ MimeType MimeDatabasePrivate::findByData(const QByteArray &data, int *accuracyPt
*accuracyPtr = 0;
MimeType candidate;
QList<QString> checkedMimeTypes;
for (const auto &provider : providers())
provider->findByMagic(data, accuracyPtr, candidate, checkedMimeTypes);
provider->findByMagic(data, accuracyPtr, candidate);
if (candidate.isValid())
return candidate;

View File

@@ -256,7 +256,7 @@ void MimeAllGlobPatterns::removeMimeType(const QString &mimeType)
void MimeGlobPatternList::match(MimeGlobMatchResult &result,
const QString &fileName,
const QList<QString> &ignoreMimeTypes) const
const QSet<QString> &ignoreMimeTypes) const
{
MimeGlobPatternList::const_iterator it = this->constBegin();
@@ -275,7 +275,7 @@ void MimeGlobPatternList::match(MimeGlobMatchResult &result,
void MimeAllGlobPatterns::matchingGlobs(const QString &fileName,
MimeGlobMatchResult &result,
const QList<QString> &ignoreMimeTypes) const
const QSet<QString> &ignoreMimeTypes) const
{
// First try the high weight matches (>50), if any.
m_highWeightGlobs.match(result, fileName, ignoreMimeTypes);

View File

@@ -142,7 +142,7 @@ public:
void match(MimeGlobMatchResult &result,
const QString &fileName,
const QList<QString> &ignoreMimeTypes) const;
const QSet<QString> &ignoreMimeTypes) const;
};
/*!
@@ -161,7 +161,7 @@ public:
void removeMimeType(const QString &mimeType);
void matchingGlobs(const QString &fileName,
MimeGlobMatchResult &result,
const QList<QString> &ignoreMimeTypes) const;
const QSet<QString> &ignoreMimeTypes) const;
void clear();
PatternsMap m_fastPatterns; // example: "doc" -> "application/msword", "text/plain"

View File

@@ -239,9 +239,7 @@ MimeType MimeBinaryProvider::mimeTypeForName(const QString &name)
return mimeTypeForNameUnchecked(name);
}
void MimeBinaryProvider::addFileNameMatches(const QString &fileName,
MimeGlobMatchResult &result,
QList<QString> &checkedMimeTypes)
void MimeBinaryProvider::addFileNameMatches(const QString &fileName, MimeGlobMatchResult &result)
{
// TODO checkedMimeTypes
if (fileName.isEmpty())
@@ -249,11 +247,7 @@ void MimeBinaryProvider::addFileNameMatches(const QString &fileName,
Q_ASSERT(m_cacheFile);
const QString lowerFileName = fileName.toLower();
// Check literals (e.g. "Makefile")
matchGlobList(result,
m_cacheFile,
m_cacheFile->getUint32(PosLiteralListOffset),
fileName,
checkedMimeTypes);
matchGlobList(result, m_cacheFile, m_cacheFile->getUint32(PosLiteralListOffset), fileName);
// Check the very common *.txt cases with the suffix tree
if (result.m_matchingMimeTypes.isEmpty()) {
const int reverseSuffixTreeOffset = m_cacheFile->getUint32(PosReverseSuffixTreeOffset);
@@ -265,8 +259,7 @@ void MimeBinaryProvider::addFileNameMatches(const QString &fileName,
firstRootOffset,
lowerFileName,
lowerFileName.length() - 1,
false,
checkedMimeTypes);
false);
if (result.m_matchingMimeTypes.isEmpty())
matchSuffixTree(result,
m_cacheFile,
@@ -274,27 +267,17 @@ void MimeBinaryProvider::addFileNameMatches(const QString &fileName,
firstRootOffset,
fileName,
fileName.length() - 1,
true,
checkedMimeTypes);
true);
}
// Check complex globs (e.g. "callgrind.out[0-9]*" or "README*")
if (result.m_matchingMimeTypes.isEmpty())
matchGlobList(result,
m_cacheFile,
m_cacheFile->getUint32(PosGlobListOffset),
fileName,
checkedMimeTypes);
// add all mime types from this provider to checkedMimeTypes, so they
// don't get checked again by another provider (if this provider overrides
// a mime type from another provider)
addAllMimeTypeNames(checkedMimeTypes);
matchGlobList(result, m_cacheFile, m_cacheFile->getUint32(PosGlobListOffset), fileName);
}
void MimeBinaryProvider::matchGlobList(MimeGlobMatchResult &result,
CacheFile *cacheFile,
int off,
const QString &fileName,
const QList<QString> &ignoreMimeTypes)
const QString &fileName)
{
const int numGlobs = cacheFile->getUint32(off);
//qDebug() << "Loading" << numGlobs << "globs from" << cacheFile->file.fileName() << "at offset" << cacheFile->globListOffset;
@@ -308,7 +291,7 @@ void MimeBinaryProvider::matchGlobList(MimeGlobMatchResult &result,
const QString pattern = QLatin1String(cacheFile->getCharStar(globOffset));
const char *mimeType = cacheFile->getCharStar(mimeTypeOffset);
if (ignoreMimeTypes.contains(QLatin1String(mimeType)))
if (m_overriddenMimeTypes.contains(QLatin1String(mimeType)))
continue;
//qDebug() << pattern << mimeType << weight << caseSensitive;
MimeGlobPattern glob(pattern, QString() /*unused*/, weight, qtCaseSensitive);
@@ -324,8 +307,7 @@ bool MimeBinaryProvider::matchSuffixTree(MimeGlobMatchResult &result,
int firstOffset,
const QString &fileName,
int charPos,
bool caseSensitiveCheck,
const QList<QString> &ignoreMimeTypes)
bool caseSensitiveCheck)
{
QChar fileChar = fileName[charPos];
int min = 0;
@@ -350,8 +332,7 @@ bool MimeBinaryProvider::matchSuffixTree(MimeGlobMatchResult &result,
childrenOffset,
fileName,
charPos,
caseSensitiveCheck,
ignoreMimeTypes);
caseSensitiveCheck);
if (!success) {
for (int i = 0; i < numChildren; ++i) {
const int childOff = childrenOffset + 12 * i;
@@ -360,7 +341,7 @@ bool MimeBinaryProvider::matchSuffixTree(MimeGlobMatchResult &result,
break;
const int mimeTypeOffset = cacheFile->getUint32(childOff + 4);
const char *mimeType = cacheFile->getCharStar(mimeTypeOffset);
if (ignoreMimeTypes.contains(QLatin1String(mimeType)))
if (m_overriddenMimeTypes.contains(QLatin1String(mimeType)))
continue;
const int flagsAndWeight = cacheFile->getUint32(childOff + 8);
const int weight = flagsAndWeight & 0xff;
@@ -406,10 +387,7 @@ bool MimeBinaryProvider::matchMagicRule(MimeBinaryProvider::CacheFile *cacheFile
return false;
}
void MimeBinaryProvider::findByMagic(const QByteArray &data,
int *accuracyPtr,
MimeType &candidate,
QList<QString> &checkedMimeTypes)
void MimeBinaryProvider::findByMagic(const QByteArray &data, int *accuracyPtr, MimeType &candidate)
{
const int magicListOffset = m_cacheFile->getUint32(PosMagicListOffset);
const int numMatches = m_cacheFile->getUint32(magicListOffset);
@@ -423,7 +401,7 @@ void MimeBinaryProvider::findByMagic(const QByteArray &data,
if (matchMagicRule(m_cacheFile, numMatchlets, firstMatchletOffset, data)) {
const int mimeTypeOffset = m_cacheFile->getUint32(off + 4);
const char *mimeType = m_cacheFile->getCharStar(mimeTypeOffset);
if (checkedMimeTypes.contains(QLatin1String(mimeType)))
if (m_overriddenMimeTypes.contains(QLatin1String(mimeType)))
continue;
*accuracyPtr = m_cacheFile->getUint32(off);
// Return the first match. We have no rules for conflicting magic data...
@@ -432,10 +410,6 @@ void MimeBinaryProvider::findByMagic(const QByteArray &data,
return;
}
}
// add all mime types from this provider to checkedMimeTypes, so they
// don't get checked again by another provider (if this provider overrides
// a mime type from another provider)
addAllMimeTypeNames(checkedMimeTypes);
}
void MimeBinaryProvider::addParents(const QString &mime, QStringList &result)
@@ -773,26 +747,17 @@ MimeType MimeXMLProvider::mimeTypeForName(const QString &name)
return m_nameMimeTypeMap.value(name);
}
void MimeXMLProvider::addFileNameMatches(const QString &fileName,
MimeGlobMatchResult &result,
QList<QString> &checkedMimeTypes)
void MimeXMLProvider::addFileNameMatches(const QString &fileName, MimeGlobMatchResult &result)
{
m_mimeTypeGlobs.matchingGlobs(fileName, result, checkedMimeTypes);
// add all mime types from this provider to checkedMimeTypes, so they
// don't get checked again by another provider (if this provider overrides
// a mime type from another provider)
addAllMimeTypeNames(checkedMimeTypes);
m_mimeTypeGlobs.matchingGlobs(fileName, result, m_overriddenMimeTypes);
}
void MimeXMLProvider::findByMagic(const QByteArray &data,
int *accuracyPtr,
MimeType &candidate,
QList<QString> &checkedMimeTypes)
void MimeXMLProvider::findByMagic(const QByteArray &data, int *accuracyPtr, MimeType &candidate)
{
QString candidateName;
bool foundOne = false;
for (const MimeMagicRuleMatcher &matcher : qAsConst(m_magicMatchers)) {
if (checkedMimeTypes.contains(matcher.mimetype()))
if (m_overriddenMimeTypes.contains(matcher.mimetype()))
continue;
if (matcher.matches(data)) {
const int priority = matcher.priority();
@@ -805,10 +770,6 @@ void MimeXMLProvider::findByMagic(const QByteArray &data,
}
if (foundOne)
candidate = mimeTypeForName(candidateName);
// add all mime types from this provider to checkedMimeTypes, so they
// don't get checked again by another provider (if this provider overrides
// a mime type from another provider)
addAllMimeTypeNames(checkedMimeTypes);
}
void MimeXMLProvider::ensureLoaded()
@@ -965,17 +926,11 @@ bool MimeBinaryProvider::hasMimeTypeForName(const QString &name)
return m_mimetypeNames.contains(name);
}
void MimeBinaryProvider::addAllMimeTypeNames(QList<QString> &result)
QStringList MimeBinaryProvider::allMimeTypeNames()
{
// similar to addAllMimeTypes
loadMimeTypeList();
if (result.isEmpty()) { // fast path
result = QList(m_mimetypeNames.cbegin(), m_mimetypeNames.cend());
} else {
for (const QString &name : qAsConst(m_mimetypeNames))
if (!result.contains(name))
result.append(name);
}
return QList(m_mimetypeNames.cbegin(), m_mimetypeNames.cend());
}
bool MimeXMLProvider::hasMimeTypeForName(const QString &name)
@@ -983,20 +938,10 @@ bool MimeXMLProvider::hasMimeTypeForName(const QString &name)
return m_nameMimeTypeMap.contains(name);
}
void MimeXMLProvider::addAllMimeTypeNames(QList<QString> &result)
QStringList MimeXMLProvider::allMimeTypeNames()
{
// similar to addAllMimeTypes
if (result.isEmpty()) { // fast path
result = m_nameMimeTypeMap.keys();
} else {
for (auto it = m_nameMimeTypeMap.constBegin(), end = m_nameMimeTypeMap.constEnd();
it != end;
++it) {
const QString newMime = it.key();
if (!result.contains(newMime))
result.append(newMime);
}
}
return m_nameMimeTypeMap.keys();
}
QMap<int, QList<MimeMagicRule>> MimeBinaryProvider::magicRulesForMimeType(const MimeType &mimeType) const

View File

@@ -70,18 +70,11 @@ public:
virtual bool isValid() = 0;
virtual bool isInternalDatabase() const = 0;
virtual MimeType mimeTypeForName(const QString &name) = 0;
virtual void addFileNameMatches(const QString &fileName,
MimeGlobMatchResult &result,
QList<QString> &checkedMimeTypes)
= 0;
virtual void addFileNameMatches(const QString &fileName, MimeGlobMatchResult &result) = 0;
virtual void addParents(const QString &mime, QStringList &result) = 0;
virtual QString resolveAlias(const QString &name) = 0;
virtual void addAliases(const QString &name, QStringList &result) = 0;
virtual void findByMagic(const QByteArray &data,
int *accuracyPtr,
MimeType &candidate,
QList<QString> &checkedMimeTypes)
= 0;
virtual void findByMagic(const QByteArray &data, int *accuracyPtr, MimeType &candidate) = 0;
virtual void addAllMimeTypes(QList<MimeType> &result) = 0;
virtual bool loadMimeTypePrivate(MimeTypePrivate &) { return false; }
virtual void loadIcon(MimeTypePrivate &) {}
@@ -92,7 +85,7 @@ public:
// added for Qt Creator
virtual bool hasMimeTypeForName(const QString &name) = 0;
virtual void addAllMimeTypeNames(QList<QString> &result) = 0;
virtual QStringList allMimeTypeNames() = 0;
virtual QMap<int, QList<MimeMagicRule>> magicRulesForMimeType(const MimeType &mimeType) const = 0;
virtual void setMagicRulesForMimeType(const MimeType &mimeType,
const QMap<int, QList<MimeMagicRule>> &rules) = 0;
@@ -101,6 +94,7 @@ public:
MimeDatabasePrivate *m_db;
QString m_directory;
QSet<QString> m_overriddenMimeTypes;
};
/*
@@ -115,16 +109,11 @@ public:
bool isValid() override;
bool isInternalDatabase() const override;
MimeType mimeTypeForName(const QString &name) override;
void addFileNameMatches(const QString &fileName,
MimeGlobMatchResult &result,
QList<QString> &checkedMimeTypes) override;
void addFileNameMatches(const QString &fileName, MimeGlobMatchResult &result) override;
void addParents(const QString &mime, QStringList &result) override;
QString resolveAlias(const QString &name) override;
void addAliases(const QString &name, QStringList &result) override;
void findByMagic(const QByteArray &data,
int *accuracyPtr,
MimeType &candidate,
QList<QString> &checkedMimeTypes) override;
void findByMagic(const QByteArray &data, int *accuracyPtr, MimeType &candidate) override;
void addAllMimeTypes(QList<MimeType> &result) override;
bool loadMimeTypePrivate(MimeTypePrivate &) override;
void loadIcon(MimeTypePrivate &) override;
@@ -133,7 +122,7 @@ public:
// added for Qt Creator
bool hasMimeTypeForName(const QString &name) override;
void addAllMimeTypeNames(QList<QString> &result) override;
QStringList allMimeTypeNames() override;
QMap<int, QList<MimeMagicRule>> magicRulesForMimeType(const MimeType &mimeType) const override;
void setMagicRulesForMimeType(const MimeType &mimeType,
const QMap<int, QList<MimeMagicRule>> &rules) override;
@@ -145,16 +134,14 @@ private:
void matchGlobList(MimeGlobMatchResult &result,
CacheFile *cacheFile,
int offset,
const QString &fileName,
const QList<QString> &ignoreMimeTypes);
const QString &fileName);
bool matchSuffixTree(MimeGlobMatchResult &result,
CacheFile *cacheFile,
int numEntries,
int firstOffset,
const QString &fileName,
int charPos,
bool caseSensitiveCheck,
const QList<QString> &ignoreMimeTypes);
bool caseSensitiveCheck);
bool matchMagicRule(CacheFile *cacheFile, int numMatchlets, int firstOffset, const QByteArray &data);
QLatin1String iconForMime(CacheFile *cacheFile, int posListOffset, const QByteArray &inputMime);
void loadMimeTypeList();
@@ -193,16 +180,11 @@ public:
bool isValid() override;
bool isInternalDatabase() const override;
MimeType mimeTypeForName(const QString &name) override;
void addFileNameMatches(const QString &fileName,
MimeGlobMatchResult &result,
QList<QString> &checkedMimeTypes) override;
void addFileNameMatches(const QString &fileName, MimeGlobMatchResult &result) override;
void addParents(const QString &mime, QStringList &result) override;
QString resolveAlias(const QString &name) override;
void addAliases(const QString &name, QStringList &result) override;
void findByMagic(const QByteArray &data,
int *accuracyPtr,
MimeType &candidate,
QList<QString> &checkedMimeTypes) override;
void findByMagic(const QByteArray &data, int *accuracyPtr, MimeType &candidate) override;
void addAllMimeTypes(QList<MimeType> &result) override;
void ensureLoaded() override;
@@ -217,7 +199,7 @@ public:
// added for Qt Creator
bool hasMimeTypeForName(const QString &name) override;
void addAllMimeTypeNames(QList<QString> &result) override;
QStringList allMimeTypeNames() override;
QMap<int, QList<MimeMagicRule>> magicRulesForMimeType(const MimeType &mimeType) const override;
void setMagicRulesForMimeType(const MimeType &mimeType,
const QMap<int, QList<MimeMagicRule>> &rules) override;