FuzzyMatcher: add option to match text with spaces

Change-Id: I0480b57d27f3625933005ca2233e9612df53072d
Reviewed-by: Orgad Shaneh <orgads@gmail.com>
Reviewed-by: Eike Ziller <eike.ziller@qt.io>
Reviewed-by: André Hartmann <aha_1980@gmx.de>
Reviewed-by: <github-actions-qt-creator@cristianadam.eu>
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
This commit is contained in:
David Schulz
2022-09-19 13:04:25 +02:00
parent c767f193ce
commit 85c5edcb6b
5 changed files with 134 additions and 23 deletions

View File

@@ -17,7 +17,7 @@
* \return the regexp * \return the regexp
*/ */
QRegularExpression FuzzyMatcher::createRegExp( QRegularExpression FuzzyMatcher::createRegExp(
const QString &pattern, FuzzyMatcher::CaseSensitivity caseSensitivity) const QString &pattern, FuzzyMatcher::CaseSensitivity caseSensitivity, bool multiWord)
{ {
if (pattern.isEmpty()) if (pattern.isEmpty())
return QRegularExpression(); return QRegularExpression();
@@ -34,9 +34,10 @@ QRegularExpression FuzzyMatcher::createRegExp(
* upper-case character. And any sequence of lower-case or upper case characters - * upper-case character. And any sequence of lower-case or upper case characters -
* followed by an underscore can preceed a lower-case character. * followed by an underscore can preceed a lower-case character.
* *
* Examples: (case sensitive mode) * Examples:
* gAC matches getActionController * gAC matches getActionController (case sensitive mode)
* gac matches get_action_controller * gac matches get_action_controller (case sensitive mode)
* gac matches Get Action Container (case insensitive multi word mode)
* *
* It also implements the fully and first-letter-only case sensitivity. * It also implements the fully and first-letter-only case sensitivity.
*/ */
@@ -51,6 +52,10 @@ QRegularExpression FuzzyMatcher::createRegExp(
const QLatin1String uppercaseWordContinuation("[a-z0-9_]*"); const QLatin1String uppercaseWordContinuation("[a-z0-9_]*");
const QLatin1String lowercaseWordContinuation("(?:[a-zA-Z0-9]*_)?"); const QLatin1String lowercaseWordContinuation("(?:[a-zA-Z0-9]*_)?");
const QLatin1String upperSnakeWordContinuation("[A-Z0-9]*_?"); const QLatin1String upperSnakeWordContinuation("[A-Z0-9]*_?");
const QLatin1String multiWordFirst("\\b");
const QLatin1String multiWordContinuation("(?:.*?\\b)*?");
keyRegExp += "(?:"; keyRegExp += "(?:";
for (const QChar &c : pattern) { for (const QChar &c : pattern) {
if (!c.isLetterOrNumber()) { if (!c.isLetterOrNumber()) {
@@ -60,6 +65,9 @@ QRegularExpression FuzzyMatcher::createRegExp(
} else if (c == asterisk) { } else if (c == asterisk) {
keyRegExp += ".*"; keyRegExp += ".*";
plainRegExp += ").*("; plainRegExp += ").*(";
} else if (multiWord && c == QChar::Space) {
// ignore spaces in keyRegExp
plainRegExp += QRegularExpression::escape(c);
} else { } else {
const QString escaped = QRegularExpression::escape(c); const QString escaped = QRegularExpression::escape(c);
keyRegExp += '(' + escaped + ')'; keyRegExp += '(' + escaped + ')';
@@ -67,26 +75,34 @@ QRegularExpression FuzzyMatcher::createRegExp(
} }
} else if (caseSensitivity == CaseSensitivity::CaseInsensitive || } else if (caseSensitivity == CaseSensitivity::CaseInsensitive ||
(caseSensitivity == CaseSensitivity::FirstLetterCaseSensitive && !first)) { (caseSensitivity == CaseSensitivity::FirstLetterCaseSensitive && !first)) {
const QString upper = QRegularExpression::escape(c.toUpper()); const QString upper = QRegularExpression::escape(c.toUpper());
const QString lower = QRegularExpression::escape(c.toLower()); const QString lower = QRegularExpression::escape(c.toLower());
keyRegExp += "(?:"; if (multiWord) {
keyRegExp += first ? uppercaseWordFirst : uppercaseWordContinuation; keyRegExp += first ? multiWordFirst : multiWordContinuation;
keyRegExp += '(' + upper + ')'; keyRegExp += '(' + upper + '|' + lower + ')';
if (first) {
keyRegExp += '|' + lowercaseWordFirst + '(' + lower + ')';
} else { } else {
keyRegExp += '|' + lowercaseWordContinuation + '(' + lower + ')'; keyRegExp += "(?:";
keyRegExp += '|' + upperSnakeWordContinuation + '(' + upper + ')'; keyRegExp += first ? uppercaseWordFirst : uppercaseWordContinuation;
keyRegExp += '(' + upper + ')';
if (first) {
keyRegExp += '|' + lowercaseWordFirst + '(' + lower + ')';
} else {
keyRegExp += '|' + lowercaseWordContinuation + '(' + lower + ')';
keyRegExp += '|' + upperSnakeWordContinuation + '(' + upper + ')';
}
keyRegExp += ')';
} }
keyRegExp += ')';
plainRegExp += '[' + upper + lower + ']'; plainRegExp += '[' + upper + lower + ']';
} else { } else {
if (!first) { if (!first) {
if (multiWord)
keyRegExp += multiWordContinuation;
if (c.isUpper()) if (c.isUpper())
keyRegExp += uppercaseWordContinuation; keyRegExp += uppercaseWordContinuation;
else else
keyRegExp += lowercaseWordContinuation; keyRegExp += lowercaseWordContinuation;
} else if (multiWord) {
keyRegExp += multiWordFirst;
} }
const QString escaped = QRegularExpression::escape(c); const QString escaped = QRegularExpression::escape(c);
keyRegExp += escaped; keyRegExp += escaped;
@@ -106,13 +122,14 @@ QRegularExpression FuzzyMatcher::createRegExp(
Qt::CaseSensitivity. Qt::CaseSensitivity.
*/ */
QRegularExpression FuzzyMatcher::createRegExp(const QString &pattern, QRegularExpression FuzzyMatcher::createRegExp(const QString &pattern,
Qt::CaseSensitivity caseSensitivity) Qt::CaseSensitivity caseSensitivity,
bool multiWord)
{ {
const CaseSensitivity sensitivity = (caseSensitivity == Qt::CaseSensitive) const CaseSensitivity sensitivity = (caseSensitivity == Qt::CaseSensitive)
? CaseSensitivity::CaseSensitive ? CaseSensitivity::CaseSensitive
: CaseSensitivity::CaseInsensitive; : CaseSensitivity::CaseInsensitive;
return createRegExp(pattern, sensitivity); return createRegExp(pattern, sensitivity, multiWord);
} }
/*! /*!

View File

@@ -30,9 +30,12 @@ public:
QVector<int> lengths; QVector<int> lengths;
}; };
static QRegularExpression createRegExp(
const QString &pattern,
CaseSensitivity caseSensitivity = CaseSensitivity::CaseInsensitive,
bool multiWord = false);
static QRegularExpression createRegExp(const QString &pattern, static QRegularExpression createRegExp(const QString &pattern,
CaseSensitivity caseSensitivity = CaseSensitivity::CaseInsensitive); Qt::CaseSensitivity caseSensitivity,
static QRegularExpression createRegExp(const QString &pattern, bool multiWord);
Qt::CaseSensitivity caseSensitivity);
static HighlightingPositions highlightingPositions(const QRegularExpressionMatch &match); static HighlightingPositions highlightingPositions(const QRegularExpressionMatch &match);
}; };

View File

@@ -224,11 +224,14 @@ Qt::CaseSensitivity ILocatorFilter::caseSensitivity(const QString &str)
/*! /*!
Creates the search term \a text as a regular expression with case Creates the search term \a text as a regular expression with case
sensitivity set to \a caseSensitivity. sensitivity set to \a caseSensitivity. Pass true to \a multiWord if the pattern is
expected to contain spaces.
*/ */
QRegularExpression ILocatorFilter::createRegExp(const QString &text, Qt::CaseSensitivity caseSensitivity) QRegularExpression ILocatorFilter::createRegExp(const QString &text,
Qt::CaseSensitivity caseSensitivity,
bool multiWord)
{ {
return FuzzyMatcher::createRegExp(text, caseSensitivity); return FuzzyMatcher::createRegExp(text, caseSensitivity, multiWord);
} }
/*! /*!

View File

@@ -168,7 +168,8 @@ public:
static Qt::CaseSensitivity caseSensitivity(const QString &str); static Qt::CaseSensitivity caseSensitivity(const QString &str);
static QRegularExpression createRegExp(const QString &text, static QRegularExpression createRegExp(const QString &text,
Qt::CaseSensitivity caseSensitivity = Qt::CaseInsensitive); Qt::CaseSensitivity caseSensitivity = Qt::CaseInsensitive,
bool multiWord = false);
static LocatorFilterEntry::HighlightInfo highlightInfo(const QRegularExpressionMatch &match, static LocatorFilterEntry::HighlightInfo highlightInfo(const QRegularExpressionMatch &match,
LocatorFilterEntry::HighlightInfo::DataType dataType = LocatorFilterEntry::HighlightInfo::DisplayName); LocatorFilterEntry::HighlightInfo::DataType dataType = LocatorFilterEntry::HighlightInfo::DisplayName);

View File

@@ -13,8 +13,12 @@ class tst_FuzzyMatcher : public QObject
private slots: private slots:
void fuzzyMatcher(); void fuzzyMatcher();
void fuzzyMatcher_data(); void fuzzyMatcher_data();
void fuzzyMatcherMultiWord();
void fuzzyMatcherMultiWord_data();
void highlighting(); void highlighting();
void highlighting_data(); void highlighting_data();
void highlightingMultiWord();
void highlightingMultiWord_data();
}; };
void tst_FuzzyMatcher::fuzzyMatcher() void tst_FuzzyMatcher::fuzzyMatcher()
@@ -64,7 +68,38 @@ void tst_FuzzyMatcher::fuzzyMatcher_data()
QTest::newRow("middle-no-hump") << "window" << "mainwindow.cpp" << 4; QTest::newRow("middle-no-hump") << "window" << "mainwindow.cpp" << 4;
QTest::newRow("case-insensitive") << "window" << "MAINWINDOW.cpp" << 4; QTest::newRow("case-insensitive") << "window" << "MAINWINDOW.cpp" << 4;
QTest::newRow("case-insensitive-2") << "wINDow" << "MainwiNdow.cpp" << 4; QTest::newRow("case-insensitive-2") << "wINDow" << "MainwiNdow.cpp" << 4;
QTest::newRow("uppercase-word-and-humps") << "htvideoele" << "HTMLVideoElement" << 0; }
void tst_FuzzyMatcher::fuzzyMatcherMultiWord()
{
QFETCH(QString, pattern);
QFETCH(QString, candidate);
QFETCH(int, expectedIndex);
const QRegularExpression regExp
= FuzzyMatcher::createRegExp(pattern, FuzzyMatcher::CaseSensitivity::CaseInsensitive, true);
const QRegularExpressionMatch match = regExp.match(candidate);
QCOMPARE(match.capturedStart(), expectedIndex);
}
void tst_FuzzyMatcher::fuzzyMatcherMultiWord_data()
{
QTest::addColumn<QString>("pattern");
QTest::addColumn<QString>("candidate");
QTest::addColumn<int>("expectedIndex");
QTest::newRow("one_word") << "fo" << "foo" << 0;
QTest::newRow("one_word_complete") << "foo" << "foo" << 0;
QTest::newRow("one_word_mismatch") << "bar" << "foo" << -1;
QTest::newRow("two_words") << "fb" << "foo bar" << 0;
QTest::newRow("two_wordslU") << "fb" << "Foo Bar" << 0;
QTest::newRow("two_wordsUl") << "FB" << "foo bar" << 0;
QTest::newRow("two_words_one_match") << "ba" << "foo bar" << 4;
QTest::newRow("two_words_complete_match") << "foo bar" << "foo bar" << 0;
QTest::newRow("wrong_order") << "bf" << "foo bar" << -1;
QTest::newRow("no_space") << "fb" << "foobar" << -1;
QTest::newRow("inword_first_match") << "oob" << "foo bar" << -1;
QTest::newRow("inword_second_match") << "foar" << "foo bar" << -1;
} }
typedef QVector<QPair<int, int>> Matches; typedef QVector<QPair<int, int>> Matches;
@@ -142,5 +177,57 @@ void tst_FuzzyMatcher::highlighting_data()
<< Matches{{0, 2}, {4, 8}}; << Matches{{0, 2}, {4, 8}};
} }
void tst_FuzzyMatcher::highlightingMultiWord()
{
QFETCH(QString, pattern);
QFETCH(QString, candidate);
QFETCH(Matches, matches);
const QRegularExpression regExp
= FuzzyMatcher::createRegExp(pattern, FuzzyMatcher::CaseSensitivity::CaseInsensitive, true);
const QRegularExpressionMatch match = regExp.match(candidate);
const FuzzyMatcher::HighlightingPositions positions
= FuzzyMatcher::highlightingPositions(match);
QCOMPARE(positions.starts.size(), matches.size());
for (int i = 0; i < positions.starts.size(); ++i) {
const QPair<int, int> &match = matches.at(i);
QCOMPARE(positions.starts.at(i), match.first);
QCOMPARE(positions.lengths.at(i), match.second);
}
}
void tst_FuzzyMatcher::highlightingMultiWord_data()
{
QTest::addColumn<QString>("pattern");
QTest::addColumn<QString>("candidate");
QTest::addColumn<Matches>("matches");
QTest::newRow("one_word") << "fo"
<< "foo" << Matches{{0, 2}};
QTest::newRow("one_word_complete") << "foo"
<< "foo" << Matches{{0, 3}};
QTest::newRow("one_word_mismatch") << "bar"
<< "foo" << Matches{};
QTest::newRow("two_words") << "fb"
<< "foo bar" << Matches{{0, 1}, {4, 1}};
QTest::newRow("two_wordslU") << "fb"
<< "Foo Bar" << Matches{{0, 1}, {4, 1}};
QTest::newRow("two_wordsUl") << "FB"
<< "foo bar" << Matches{{0, 1}, {4, 1}};
QTest::newRow("two_words_one_match") << "ba"
<< "foo bar" << Matches{{4, 2}};
QTest::newRow("two_words_complete_match") << "foo bar"
<< "foo bar" << Matches{{0, 7}};
QTest::newRow("wrong_order") << "bf"
<< "foo bar" << Matches{};
QTest::newRow("no_space") << "fb"
<< "foobar" << Matches{};
QTest::newRow("inword_first_match") << "oob"
<< "foo bar" << Matches{};
QTest::newRow("inword_second_match") << "foar"
<< "foo bar" << Matches{};
}
QTEST_APPLESS_MAIN(tst_FuzzyMatcher) QTEST_APPLESS_MAIN(tst_FuzzyMatcher)
#include "tst_fuzzymatcher.moc" #include "tst_fuzzymatcher.moc"