CPaster: Fix fetching list of pastebin.ca

Page now uses websockets internally that updates respective
content asynchronously. Initial GET of the page only fetches
an empty list - work around by using a pre-constructed json
array that is contained within the initial content.

Change-Id: Id8619450ecc758057fce23079e7eed6de8abe5dc
Reviewed-by: Friedemann Kleint <Friedemann.Kleint@qt.io>
This commit is contained in:
Christian Stenger
2017-04-04 11:15:26 +02:00
parent 49e7f07526
commit f28e6dd029

View File

@@ -28,9 +28,11 @@
#include <utils/qtcassert.h> #include <utils/qtcassert.h>
#include <QNetworkReply> #include <QNetworkReply>
#include <QXmlStreamReader>
#include <QXmlStreamAttributes>
#include <QStringList> #include <QStringList>
#include <QJsonArray>
#include <QJsonDocument>
#include <QJsonValue>
#include <QJsonObject>
static const char urlC[] = "http://pastebin.ca/"; static const char urlC[] = "http://pastebin.ca/";
static const char internalUrlC[] = "http://pbin.ca/"; static const char internalUrlC[] = "http://pbin.ca/";
@@ -180,58 +182,39 @@ bool PasteBinDotCaProtocol::checkConfiguration(QString *errorMessage)
return ok; return ok;
} }
/* Quick & dirty: Parse the <div>-elements with the "Recent Posts" listing /* Quick & dirty: Parse page does no more work due to internal javascript/websocket magic - so,
* out of the page. * search for _initial_ json array containing the last added pastes.
\code \code
<div class="menutitle"><h2>Recent Posts</h2></div> <script type="text/javascript">var pHistoryInitial = [{"id":3791300,"ts":1491288268,"name":"try",
<div class="items" id="idmenurecent-collapse"> "expires":1491374668},
<div class='recentlink'>
<a href="/[id]" class="rjt" rel="/preview.php?id=[id]">[nameTitle]</a>
<div class='recentdetail'>[time spec]</div>
</div>
...<h2>Create a New Pastebin Post</h2>
\endcode */ \endcode */
static inline QStringList parseLists(QIODevice *io) static inline QStringList parseLists(QIODevice *io)
{ {
enum State { OutsideRecentLink, InsideRecentLink };
QStringList rc; QStringList rc;
const QString classAttribute = QLatin1String("class");
const QString divElement = QLatin1String("div");
const QString anchorElement = QLatin1String("a");
// Start parsing at the 'recent posts' entry as the HTML above is not well-formed
// as of 8.4.2010. This will then terminate with an error.
QByteArray data = io->readAll(); QByteArray data = io->readAll();
const QByteArray recentPosts("<h2>Recent Posts</h2></div>"); const QByteArray history("<script type=\"text/javascript\">var pHistoryInitial = ");
const int recentPostsPos = data.indexOf(recentPosts); int pos = data.indexOf(history);
if (recentPostsPos == -1) if (pos == -1)
return rc; return rc;
data.remove(0, recentPostsPos + recentPosts.size()); data.remove(0, pos + history.size());
QXmlStreamReader reader(data); pos = data.indexOf(";</script>");
State state = OutsideRecentLink; if (pos == -1)
while (!reader.atEnd()) { return rc;
switch (reader.readNext()) { data.truncate(pos);
case QXmlStreamReader::StartElement: QJsonParseError error;
// Inside a <div> of an entry: Anchor or description const QJsonDocument doc = QJsonDocument::fromJson(data, &error);
if (state == InsideRecentLink && reader.name() == anchorElement) { // Anchor if (error.error != QJsonParseError::NoError)
// Strip host from link return rc;
QString link = reader.attributes().value(QLatin1String("href")).toString(); QJsonArray array = doc.array();
if (link.startsWith(QLatin1Char('/'))) for (const QJsonValue &val : array) {
link.remove(0, 1); const QJsonObject obj = val.toObject();
const QString nameTitle = reader.readElementText(); const QJsonValue id = obj.value("id");
rc.push_back(link + QLatin1Char(' ') + nameTitle); const QJsonValue name = obj.value("name");
} else if (state == OutsideRecentLink && reader.name() == divElement) { // "<div>" state switching if (!id.isUndefined())
if (reader.attributes().value(classAttribute) == QLatin1String("recentlink")) rc.append(QString::number(id.toInt()) + ' ' + name.toString());
state = InsideRecentLink; }
} // divElement
break;
default:
break;
} // switch reader
} // while reader.atEnd()
return rc; return rc;
} }