Switch to news from registry

This commit is contained in:
Ivan Kravets
2019-06-26 00:29:49 +03:00
parent 6daf387c90
commit 89b951e7d5
4 changed files with 9 additions and 152 deletions

View File

@ -1,3 +1,3 @@
[settings]
line_length=79
known_third_party=bottle,click,pytest,requests,SCons,semantic_version,serial,twisted,autobahn,bs4,jsonrpc
known_third_party=bottle,click,pytest,requests,SCons,semantic_version,serial,twisted,autobahn,jsonrpc

2
docs

Submodule docs updated: b439090186...f660192cb1

View File

@ -13,14 +13,11 @@
# limitations under the License.
import json
import re
import time
from bs4 import BeautifulSoup # pylint: disable=import-error
from twisted.internet import defer, reactor # pylint: disable=import-error
from platformio import app
from platformio.commands.home import helpers
from platformio.commands.home.rpc.handlers.os import OSRPC
@ -42,9 +39,12 @@ class MiscRPC(object):
result = self._preload_latest_tweets(username, cache_key, cache_valid)
return result
@staticmethod
@defer.inlineCallbacks
def _preload_latest_tweets(self, username, cache_key, cache_valid):
result = yield self._fetch_tweets(username)
def _preload_latest_tweets(username, cache_key, cache_valid):
result = yield OSRPC.fetch_content(
"https://api.platformio.org/tweets/" + username)
result = json.loads(result)
with app.ContentCache() as cc:
cc.set(cache_key,
json.dumps({
@ -52,142 +52,3 @@ class MiscRPC(object):
"result": result
}), cache_valid)
defer.returnValue(result)
@defer.inlineCallbacks
def _fetch_tweets(self, username):
api_url = ("https://twitter.com/i/profiles/show/%s/timeline/tweets?"
"include_available_features=1&include_entities=1&"
"include_new_items_bar=true") % username
if helpers.is_twitter_blocked():
api_url = self._get_proxed_uri(api_url)
content = yield OSRPC.fetch_content(
api_url, headers=self._get_twitter_headers(username))
content = json.loads(content)
assert "items_html" in content
soup = BeautifulSoup(content['items_html'], "html.parser")
tweet_nodes = soup.find_all("div",
attrs={
"class": "tweet",
"data-tweet-id": True
})
result = yield defer.DeferredList(
[self._parse_tweet_node(node, username) for node in tweet_nodes],
consumeErrors=True)
defer.returnValue([r[1] for r in result if r[0]])
@defer.inlineCallbacks
def _parse_tweet_node(self, tweet, username):
# remove non-visible items
for node in tweet.find_all(class_=["invisible", "u-hidden"]):
node.decompose()
twitter_url = "https://twitter.com"
time_node = tweet.find("span", attrs={"data-time": True})
text_node = tweet.find(class_="tweet-text")
quote_text_node = tweet.find(class_="QuoteTweet-text")
if quote_text_node and not text_node.get_text().strip():
text_node = quote_text_node
photos = [
node.get("data-image-url") for node in (tweet.find_all(class_=[
"AdaptiveMedia-photoContainer", "QuoteMedia-photoContainer"
]) or [])
]
urls = [
node.get("data-expanded-url")
for node in (quote_text_node or text_node).find_all(
class_="twitter-timeline-link",
attrs={"data-expanded-url": True}
)
] # yapf: disable
# fetch data from iframe card
if (not photos or not urls) and tweet.get("data-card2-type"):
iframe_node = tweet.find("div",
attrs={"data-full-card-iframe-url": True})
if iframe_node:
iframe_card = yield self._fetch_iframe_card(
twitter_url + iframe_node.get("data-full-card-iframe-url"),
username)
if not photos and iframe_card['photo']:
photos.append(iframe_card['photo'])
if not urls and iframe_card['url']:
urls.append(iframe_card['url'])
if iframe_card['text_node']:
text_node = iframe_card['text_node']
if not photos:
photos.append(tweet.find("img", class_="avatar").get("src"))
def _fetch_text(text_node):
text = text_node.decode_contents(formatter="html").strip()
text = re.sub(r'href="/', 'href="%s/' % twitter_url, text)
if "</p>" not in text and "<br" not in text:
text = re.sub(r"\n+", "<br />", text)
return text
defer.returnValue({
"tweetId":
tweet.get("data-tweet-id"),
"tweetUrl":
twitter_url + tweet.get("data-permalink-path"),
"author":
tweet.get("data-name"),
"time":
int(time_node.get("data-time")),
"timeFormatted":
time_node.string,
"text":
_fetch_text(text_node),
"entries": {
"urls":
urls,
"photos": [
self._get_proxed_uri(uri)
if helpers.is_twitter_blocked() else uri for uri in photos
]
},
"isPinned":
"user-pinned" in tweet.get("class")
})
@defer.inlineCallbacks
def _fetch_iframe_card(self, url, username):
if helpers.is_twitter_blocked():
url = self._get_proxed_uri(url)
html = yield OSRPC.fetch_content(
url, headers=self._get_twitter_headers(username), cache_valid="7d")
soup = BeautifulSoup(html, "html.parser")
photo_node = soup.find("img", attrs={"data-src": True})
url_node = soup.find("a", class_="TwitterCard-container")
text_node = soup.find("div", class_="SummaryCard-content")
if text_node:
text_node.find("span",
class_="SummaryCard-destination").decompose()
defer.returnValue({
"photo":
photo_node.get("data-src") if photo_node else None,
"text_node":
text_node,
"url":
url_node.get("href") if url_node else None
})
@staticmethod
def _get_proxed_uri(uri):
index = uri.index("://")
return "https://dl.platformio.org/__prx__/" + uri[index + 3:]
@staticmethod
def _get_twitter_headers(username):
return {
"Accept":
"application/json, text/javascript, */*; q=0.01",
"Referer":
"https://twitter.com/%s" % username,
"User-Agent":
("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit"
"/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8"),
"X-Twitter-Active-User":
"yes",
"X-Requested-With":
"XMLHttpRequest"
}

View File

@ -33,7 +33,6 @@ class OSRPC(object):
@staticmethod
@defer.inlineCallbacks
def fetch_content(uri, data=None, headers=None, cache_valid=None):
timeout = 2
if not headers:
headers = {
"User-Agent":
@ -54,12 +53,9 @@ class OSRPC(object):
session = helpers.requests_session()
if data:
r = yield session.post(uri,
data=data,
headers=headers,
timeout=timeout)
r = yield session.post(uri, data=data, headers=headers)
else:
r = yield session.get(uri, headers=headers, timeout=timeout)
r = yield session.get(uri, headers=headers)
r.raise_for_status()
result = r.text