/************************************************************************** ** ** This file is part of Qt Creator ** ** Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). ** ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** Commercial Usage ** ** Licensees holding valid Qt Commercial licenses may use this file in ** accordance with the Qt Commercial License Agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and Nokia. ** ** GNU Lesser General Public License Usage ** ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** If you are unsure which license is appropriate for your use, please ** contact the sales department at http://qt.nokia.com/contact. ** **************************************************************************/ #include "htmldocextractor.h" #include #include #include #include using namespace Utils; namespace { QRegExp createMinimalExp(const QString &pattern) { QRegExp exp(pattern); exp.setMinimal(true); return exp; } } HtmlDocExtractor::HtmlDocExtractor() : m_lengthReference(-1), m_truncateAtParagraph(false), m_formatContents(true), m_extendedExtraction(false) {} void HtmlDocExtractor::extractFirstParagraphOnly() { m_extendedExtraction = false; } void HtmlDocExtractor::extractExtendedContents(const int length, const bool truncateAtParagraph) { m_lengthReference = length; m_truncateAtParagraph = truncateAtParagraph; m_extendedExtraction = true; } void HtmlDocExtractor::applyFormatting(const bool format) { m_formatContents = format; } QString HtmlDocExtractor::getClassOrNamespaceBrief(const QString &html, const QString &mark) const { QString contents = getContentsByMarks(html, mark + QLatin1String("-brief"), mark); if (!contents.isEmpty() && m_formatContents) { contents.remove(QLatin1String("More...")); contents.prepend(QLatin1String("")); contents.append(QLatin1String("")); } processOutput(&contents); return contents; } QString HtmlDocExtractor::getClassOrNamespaceDescription(const QString &html, const QString &mark) const { if (!m_extendedExtraction) return getClassOrNamespaceBrief(html, mark); QString contents = getContentsByMarks(html, mark + QLatin1String("-description"), mark); if (!contents.isEmpty() && m_formatContents) contents.remove(QLatin1String("Detailed Description")); processOutput(&contents); return contents; } QString HtmlDocExtractor::getEnumDescription(const QString &html, const QString &mark) const { return getClassOrNamespaceMemberDescription(html, mark, mark); } QString HtmlDocExtractor::getTypedefDescription(const QString &html, const QString &mark) const { return getClassOrNamespaceMemberDescription(html, mark, mark); } QString HtmlDocExtractor::getMacroDescription(const QString &html, const QString &mark) const { return getClassOrNamespaceMemberDescription(html, mark, mark); } QString HtmlDocExtractor::getFunctionDescription(const QString &html, const QString &mark, const bool mainOverload) const { QString cleanMark = mark; QString startMark = mark; const int parenthesis = mark.indexOf(QLatin1Char('(')); if (parenthesis != -1) { startMark = mark.left(parenthesis); cleanMark = startMark; if (mainOverload) { startMark.append(QLatin1String("[overload1]")); } else { QString complement = mark.right(mark.length() - parenthesis); complement.remove(QRegExp(QLatin1String("[\\(\\), ]"))); startMark.append(complement); } } QString contents = getClassOrNamespaceMemberDescription(html, startMark, cleanMark); if (contents.isEmpty()) { // Maybe this is a property function, which is documented differently. Besides // setX/isX/hasX there are other (not so usual) names for them. A few examples of those: // - toPlainText / Prop. plainText from QPlainTextEdit. // - resize / Prop. size from QWidget. // - move / Prop. pos from QWidget (nothing similar in the names in this case). // So I try to find the link to this property in the list of properties, extract its // anchor and then follow by the name found. const QString &pattern = QString(QLatin1String("%1")).arg(cleanMark); QRegExp exp = createMinimalExp(pattern); if (exp.indexIn(html) != -1) { const QString &prop = exp.cap(1); contents = getClassOrNamespaceMemberDescription(html, prop + QLatin1String("-prop"), prop); } } return contents; } QString HtmlDocExtractor::getClassOrNamespaceMemberDescription(const QString &html, const QString &startMark, const QString &endMark) const { QString contents = getContentsByMarks(html, startMark, endMark); processOutput(&contents); return contents; } QString HtmlDocExtractor::getContentsByMarks(const QString &html, QString startMark, QString endMark) const { startMark.prepend(QLatin1String("$$$")); endMark.prepend(QLatin1String(""), start); if (start != -1) { int end = html.indexOf(endMark, start); if (end != -1) { start += 3; contents = html.mid(start, end - start); } } } return contents; } void HtmlDocExtractor::processOutput(QString *html) const { if (html->isEmpty()) return; if (!m_extendedExtraction) { int paragraph = html->indexOf(QLatin1String("

")); if (paragraph != -1) { paragraph += 4; html->truncate(paragraph); } else { // Some enumerations don't have paragraphs and just a table with the items. In such // cases the the html is cleared to avoid showing more that desired. html->clear(); return; } } if (m_formatContents) { stripBold(html); replaceNonStyledHeadingsForBold(html); replaceTablesForSimpleLines(html); replaceListsForSimpleLines(html); stripLinks(html); stripHorizontalLines(html); stripDivs(html); stripTagsStyles(html); stripHeadings(html); stripImagens(html); stripEmptyParagraphs(html); if (!html->startsWith(QLatin1String(""))) { if (!m_extendedExtraction) { if (!html->endsWith(QLatin1String(".

"))) { //

For paragraphs similar to this. Example:

const int lastDot = html->lastIndexOf(QLatin1Char('.')); if (lastDot != -1) { html->truncate(lastDot); html->append(QLatin1String(".

")); } } } const int noBreakLimit = 140; const int paragraph = html->indexOf(QLatin1String("

")); if (paragraph > 0 && paragraph <= noBreakLimit) { html->insert(paragraph, QLatin1String("")); html->prepend(QLatin1String("")); } } } if (m_extendedExtraction && m_lengthReference > -1 && html->length() > m_lengthReference) { if (m_truncateAtParagraph) { const int nextBegin = html->indexOf(QLatin1String("

"), m_lengthReference); QRegExp exp = createMinimalExp(QLatin1String("

|
")); const int previousEnd = html->lastIndexOf(exp, m_lengthReference); if (nextBegin != -1 && previousEnd != -1) html->truncate(qMin(nextBegin, previousEnd + exp.matchedLength())); else if (nextBegin != -1 || previousEnd != -1) html->truncate((nextBegin != -1? nextBegin : previousEnd + exp.matchedLength())); } else { html->truncate(m_lengthReference); } if (m_formatContents) { if (html->endsWith(QLatin1String("
"))) html->chop(6); html->append(QLatin1String("

...

")); } } } void HtmlDocExtractor::stripAllHtml(QString *html) { html->remove(createMinimalExp(QLatin1String("<.*>"))); } void HtmlDocExtractor::stripHeadings(QString *html) { html->remove(createMinimalExp(QLatin1String("|"))); } void HtmlDocExtractor::stripLinks(QString *html) { html->remove(createMinimalExp(QLatin1String("|"))); } void HtmlDocExtractor::stripHorizontalLines(QString *html) { html->remove(createMinimalExp(QLatin1String(""))); } void HtmlDocExtractor::stripDivs(QString *html) { html->remove(createMinimalExp(QLatin1String("||"))); } void HtmlDocExtractor::stripTagsStyles(QString *html) { const QRegExp &exp = createMinimalExp(QLatin1String("<(.*\\s+)class=\".*\">")); html->replace(exp, QLatin1String("<\\1>")); } void HtmlDocExtractor::stripTeletypes(QString *html) { html->remove(QLatin1String("")); html->remove(QLatin1String("")); } void HtmlDocExtractor::stripImagens(QString *html) { html->remove(createMinimalExp(QLatin1String(""))); } void HtmlDocExtractor::stripBold(QString *html) { html->remove(QLatin1String("")); html->remove(QLatin1String("")); } void HtmlDocExtractor::stripEmptyParagraphs(QString *html) { html->remove(QLatin1String("

")); } void HtmlDocExtractor::replaceNonStyledHeadingsForBold(QString *html) { const QRegExp &hStart = createMinimalExp(QLatin1String("")); const QRegExp &hEnd = createMinimalExp(QLatin1String("")); html->replace(hStart, QLatin1String("

")); html->replace(hEnd, QLatin1String("

")); } void HtmlDocExtractor::replaceTablesForSimpleLines(QString *html) { html->replace(createMinimalExp(QLatin1String("(?:

)?")), QLatin1String("

")); html->replace(QLatin1String(""), QLatin1String("

")); html->remove(createMinimalExp(QLatin1String(""))); html->remove(QLatin1String("")); html->remove(createMinimalExp(QLatin1String(""))); html->remove(QLatin1String("")); html->remove(createMinimalExp(QLatin1String(".*"))); html->replace(QLatin1String(" remove(createMinimalExp(QLatin1String("

"))); html->remove(createMinimalExp(QLatin1String(""))); html->remove(createMinimalExp(QLatin1String("(?:

)?"))); html->replace(createMinimalExp(QLatin1String("")), QLatin1String("    ")); html->replace(QLatin1String(""), QLatin1String("
")); } void HtmlDocExtractor::replaceListsForSimpleLines(QString *html) { html->remove(createMinimalExp(QLatin1String("<(?:ul|ol).*>"))); html->remove(createMinimalExp(QLatin1String(""))); html->replace(QLatin1String("
  • "), QLatin1String("    ")); html->replace(QLatin1String("
  • "), QLatin1String("
    ")); }