From 29811f81dcee8d9c2f0d608cb9d30ba42d7d0454 Mon Sep 17 00:00:00 2001 From: Jarek Kobus Date: Tue, 23 May 2023 17:34:59 +0200 Subject: [PATCH] FileSearch: Introduce FileContainer That's going to replace FileIterator. Benefits over FileIterator: 1. The FileContainer is a value type which may be freely copied. 2. Much faster iterating, especially for SubDirFileContainer with a big number of dirs / files (over 500000). The FileContainer has begin() and end() functions returning FileContainerIterator. The FileContainerIterator, when it's not an end() iterator, contains the Item's value itself. Whenever the iterator advances, the Item's value changes, while the reference to Item's value remains unchanged inside the iterator. The SubDirFileContainer doesn't keep the references to all visited Items anymore, contrary to the corresponding SubDirFileInterator. The values of the individually visited Items are kept only inside FileContainerIterator instances. The progress information is available through the FileContainerIterator's progressValue() / progressMaximum(). Task-number: QTCREATORBUG-28892 Change-Id: If89ac1121c21fa2fb7355aa12438dce3e85e465d Reviewed-by: Qt CI Bot Reviewed-by: Reviewed-by: Marcus Tillmanns Reviewed-by: Eike Ziller --- src/libs/utils/filesearch.cpp | 202 +++++++++++++++++- src/libs/utils/filesearch.h | 97 +++++++++ .../tst_subdirfileiterator.cpp | 19 ++ 3 files changed, 310 insertions(+), 8 deletions(-) diff --git a/src/libs/utils/filesearch.cpp b/src/libs/utils/filesearch.cpp index 662448a7de3..7dd8951b91c 100644 --- a/src/libs/utils/filesearch.cpp +++ b/src/libs/utils/filesearch.cpp @@ -550,6 +550,196 @@ FileIterator::const_iterator FileIterator::end() const // #pragma mark -- FileListIterator +void FileContainerIterator::operator++() +{ + QTC_ASSERT(m_data.m_container, return); + QTC_ASSERT(m_data.m_index >= 0, return); + QTC_ASSERT(m_data.m_advancer, return); + m_data.m_advancer(&m_data); +} + +int FileContainerIterator::progressMaximum() const +{ + return m_data.m_container ? m_data.m_container->progressMaximum() : 0; +} + +static QList toFileListCache(const FilePaths &fileList, + const QList &encodings) +{ + QList items; + items.reserve(fileList.size()); + QTextCodec *defaultEncoding = QTextCodec::codecForLocale(); + for (int i = 0; i < fileList.size(); ++i) + items.append({fileList.at(i), encodings.value(i, defaultEncoding)}); + return items; +} + +static FileContainerIterator::Advancer fileListAdvancer( + const QList &items) +{ + return [items](FileContainerIterator::Data *iterator) { + ++iterator->m_index; + if (iterator->m_index >= items.size() || iterator->m_index < 0) { + iterator->m_value = {}; + iterator->m_index = -1; + iterator->m_progressValue = items.size(); + return; + } + iterator->m_value = items.at(iterator->m_index); + iterator->m_progressValue = iterator->m_index; + }; +} + +static FileContainer::AdvancerProvider fileListAdvancerProvider(const FilePaths &fileList, + const QList &encodings) +{ + const auto initialCache = toFileListCache(fileList, encodings); + return [=] { return fileListAdvancer(initialCache); }; +} + +FileListContainer::FileListContainer(const FilePaths &fileList, + const QList &encodings) + : FileContainer(fileListAdvancerProvider(fileList, encodings), fileList.size()) {} + +const int s_progressMaximum = 1000; + +struct SubDirCache +{ + SubDirCache(const FilePaths &directories, const QStringList &filters, + const QStringList &exclusionFilters, QTextCodec *encoding); + + std::optional updateCache(int advanceIntoIndex, + const SubDirCache &initialCache); + + std::function m_filterFiles; + QTextCodec *m_encoding = nullptr; + QStack m_dirs; + QSet m_knownDirs; + QStack m_progressValues; + QStack m_processedValues; + qreal m_progress = 0; + QList m_items; + // When forward iterating, we construct some results for the future iterations + // and keep them in m_items cache. Later, when we iterated over all from the cache, + // we don't want to keep the cache anymore, so we are clearing it. + // In order to match the iterator's index with the position inside m_items cache, + // we need to remember how many items were removed from the cache and subtract + // this value from the iterator's index when a new advance comes. + int m_removedItemsCount = 0; +}; + +SubDirCache::SubDirCache(const FilePaths &directories, const QStringList &filters, + const QStringList &exclusionFilters, QTextCodec *encoding) + : m_filterFiles(filterFilesFunction(filters, exclusionFilters)) + , m_encoding(encoding == nullptr ? QTextCodec::codecForLocale() : encoding) +{ + const qreal maxPer = qreal(s_progressMaximum) / directories.count(); + for (const FilePath &directoryEntry : directories) { + if (!directoryEntry.isEmpty()) { + const FilePath canonicalPath = directoryEntry.canonicalPath(); + if (!canonicalPath.isEmpty() && directoryEntry.exists()) { + m_dirs.push(directoryEntry); + m_knownDirs.insert(canonicalPath); + m_progressValues.push(maxPer); + m_processedValues.push(false); + } + } + } +} + +std::optional SubDirCache::updateCache(int advanceIntoIndex, + const SubDirCache &initialCache) +{ + QTC_ASSERT(advanceIntoIndex >= 0, return {}); + if (advanceIntoIndex < m_removedItemsCount) + *this = initialCache; // Regenerate the cache from scratch + const int currentIndex = advanceIntoIndex - m_removedItemsCount; + if (currentIndex < m_items.size()) + return m_items.at(currentIndex); + + m_removedItemsCount += m_items.size(); + m_items.clear(); + const int newCurrentIndex = advanceIntoIndex - m_removedItemsCount; + + while (!m_dirs.isEmpty() && newCurrentIndex >= m_items.size()) { + const FilePath dir = m_dirs.pop(); + const qreal dirProgressMax = m_progressValues.pop(); + const bool processed = m_processedValues.pop(); + if (dir.exists()) { + using Dir = FilePath; + using CanonicalDir = FilePath; + std::vector> subDirs; + if (!processed) { + const FilePaths entries = dir.dirEntries(QDir::Dirs | QDir::Hidden + | QDir::NoDotAndDotDot); + for (const FilePath &entry : entries) { + const FilePath canonicalDir = entry.canonicalPath(); + if (!m_knownDirs.contains(canonicalDir)) + subDirs.emplace_back(entry, canonicalDir); + } + } + if (subDirs.empty()) { + const FilePaths allFilePaths = dir.dirEntries(QDir::Files | QDir::Hidden); + const FilePaths filePaths = m_filterFiles(allFilePaths); + m_items.reserve(m_items.size() + filePaths.size()); + Utils::reverseForeach(filePaths, [this](const FilePath &file) { + m_items.append({file, m_encoding}); + }); + m_progress += dirProgressMax; + } else { + const qreal subProgress = dirProgressMax / (subDirs.size() + 1); + m_dirs.push(dir); + m_progressValues.push(subProgress); + m_processedValues.push(true); + Utils::reverseForeach(subDirs, + [this, subProgress](const std::pair &dir) { + m_dirs.push(dir.first); + m_knownDirs.insert(dir.second); + m_progressValues.push(subProgress); + m_processedValues.push(false); + }); + } + } else { + m_progress += dirProgressMax; + } + } + if (newCurrentIndex < m_items.size()) + return m_items.at(newCurrentIndex); + + m_progress = s_progressMaximum; + return {}; +} + +static FileContainerIterator::Advancer subDirAdvancer(const SubDirCache &initialCache) +{ + const std::shared_ptr sharedCache(new SubDirCache(initialCache)); + return [=](FileContainerIterator::Data *iterator) { + ++iterator->m_index; + const std::optional item + = sharedCache->updateCache(iterator->m_index, initialCache); + if (!item) { + iterator->m_value = {}; + iterator->m_index = -1; + iterator->m_progressValue = s_progressMaximum; + return; + } + iterator->m_value = *item; + iterator->m_progressValue = qMin(qRound(sharedCache->m_progress), s_progressMaximum); + }; +} + +static FileContainer::AdvancerProvider subDirAdvancerProvider(const FilePaths &directories, + const QStringList &filters, const QStringList &exclusionFilters, QTextCodec *encoding) +{ + const SubDirCache initialCache(directories, filters, exclusionFilters, encoding); + return [=] { return subDirAdvancer(initialCache); }; +} + +SubDirFileContainer::SubDirFileContainer(const FilePaths &directories, const QStringList &filters, + const QStringList &exclusionFilters, QTextCodec *encoding) + : FileContainer(subDirAdvancerProvider(directories, filters, exclusionFilters, encoding), + s_progressMaximum) {} + QList constructItems(const FilePaths &fileList, const QList &encodings) { @@ -594,10 +784,6 @@ int FileListIterator::currentProgress() const // #pragma mark -- SubDirFileIterator -namespace { - const int MAX_PROGRESS = 1000; -} - SubDirFileIterator::SubDirFileIterator(const FilePaths &directories, const QStringList &filters, const QStringList &exclusionFilters, @@ -606,7 +792,7 @@ SubDirFileIterator::SubDirFileIterator(const FilePaths &directories, , m_progress(0) { m_encoding = (encoding == nullptr ? QTextCodec::codecForLocale() : encoding); - qreal maxPer = qreal(MAX_PROGRESS) / directories.count(); + qreal maxPer = qreal(s_progressMaximum) / directories.count(); for (const FilePath &directoryEntry : directories) { if (!directoryEntry.isEmpty()) { const FilePath canonicalPath = directoryEntry.canonicalPath(); @@ -672,7 +858,7 @@ void SubDirFileIterator::update(int index) } } if (index >= m_items.size()) - m_progress = MAX_PROGRESS; + m_progress = s_progressMaximum; } int SubDirFileIterator::currentFileCount() const @@ -687,12 +873,12 @@ const FileIterator::Item &SubDirFileIterator::itemAt(int index) const int SubDirFileIterator::maxProgress() const { - return MAX_PROGRESS; + return s_progressMaximum; } int SubDirFileIterator::currentProgress() const { - return qMin(qRound(m_progress), MAX_PROGRESS); + return qMin(qRound(m_progress), s_progressMaximum); } } diff --git a/src/libs/utils/filesearch.h b/src/libs/utils/filesearch.h index b0bd01c3362..81c5e2b8cd1 100644 --- a/src/libs/utils/filesearch.h +++ b/src/libs/utils/filesearch.h @@ -61,6 +61,103 @@ enum class InclusionType { QTCREATOR_UTILS_EXPORT QString msgFilePatternToolTip(InclusionType inclusionType = InclusionType::Included); +class FileContainer; + +class QTCREATOR_UTILS_EXPORT FileContainerIterator +{ +public: + class Item + { + public: + FilePath filePath {}; + QTextCodec *encoding = nullptr; + }; + + class Data; + using Advancer = std::function; + + class Data + { + public: + const FileContainer *m_container = nullptr; + int m_progressValue = 0; + Advancer m_advancer = {}; + int m_index = -1; // end iterator + Item m_value = {}; + }; + + using value_type = Item; + using pointer = const value_type *; + using reference = const value_type &; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + + FileContainerIterator() = default; + + reference operator*() const { return m_data.m_value; } + pointer operator->() const { return &m_data.m_value; } + void operator++(); + + bool operator==(const FileContainerIterator &other) const { + return m_data.m_container == other.m_data.m_container + && m_data.m_index == other.m_data.m_index; + } + bool operator!=(const FileContainerIterator &other) const { return !operator==(other); } + int progressValue() const { return m_data.m_progressValue; } + int progressMaximum() const; + +private: + friend class FileContainer; + FileContainerIterator(const Data &data) : m_data(data) {} + Data m_data; +}; + +class QTCREATOR_UTILS_EXPORT FileContainer +{ +public: + using AdvancerProvider = std::function; + + FileContainer() = default; + + FileContainerIterator begin() const { + if (!m_provider) + return end(); + const FileContainerIterator::Advancer advancer = m_provider(); + if (!advancer) + return end(); + FileContainerIterator iterator({this, 0, advancer}); + advancer(&iterator.m_data); + return iterator; + } + FileContainerIterator end() const { return FileContainerIterator({this, m_progressMaximum}); } + int progressMaximum() const { return m_progressMaximum; } + +protected: + FileContainer(const AdvancerProvider &provider, int progressMaximum) + : m_provider(provider) + , m_progressMaximum(progressMaximum) {} + +private: + friend class FileContainerIterator; + AdvancerProvider m_provider; + int m_progressMaximum = 0; +}; + +class QTCREATOR_UTILS_EXPORT FileListContainer : public FileContainer +{ +public: + FileListContainer(const FilePaths &fileList, const QList &encodings); +}; + +class QTCREATOR_UTILS_EXPORT SubDirFileContainer : public FileContainer +{ +public: + SubDirFileContainer(const FilePaths &directories, + const QStringList &filters, + const QStringList &exclusionFilters, + QTextCodec *encoding = nullptr); +}; + class QTCREATOR_UTILS_EXPORT FileIterator { public: diff --git a/tests/manual/subdirfileiterator/tst_subdirfileiterator.cpp b/tests/manual/subdirfileiterator/tst_subdirfileiterator.cpp index b00d3aa7f65..55312208e8a 100644 --- a/tests/manual/subdirfileiterator/tst_subdirfileiterator.cpp +++ b/tests/manual/subdirfileiterator/tst_subdirfileiterator.cpp @@ -204,6 +204,25 @@ private slots: QCOMPARE(filesCount, m_filesCount); } + void testSubDirFileContainer() + { + QTC_SCOPED_TIMER("ITERATING with FileContainer"); + int filesCount = 0; + { + const FilePath root(FilePath::fromString(m_tempDir->path())); + FileContainer container = SubDirFileContainer({root}, {}, {}); + auto it = container.begin(); + const auto end = container.end(); + while (it != end) { + ++filesCount; + ++it; + if (filesCount % 100000 == 0) + qDebug() << filesCount << '/' << m_filesCount << "files visited so far..."; + } + } + QCOMPARE(filesCount, m_filesCount); + } + void testManualIterator() { QTC_SCOPED_TIMER("ITERATING with manual iterator");