Add support to linkify links and mentions on twitter

This converts links in tweets to a tags. We also convert the displayed
text to the target of the twitter short URL. Mentions are linked to the
users twitter page.

(imported from commit 192d5546a7eea82759f9ae30d82c102aed15ff71)
This commit is contained in:
Jason Michalski
2014-01-08 16:56:48 -05:00
committed by Waseem Daher
parent 45d3bb4f04
commit 1352dccea5
3 changed files with 201 additions and 14 deletions

View File

@@ -12,6 +12,8 @@ import platform
import time
import HTMLParser
import httplib2
import itertools
import urllib
import hashlib
@@ -189,6 +191,74 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
return None
return "https://i.ytimg.com/vi/%s/default.jpg" % (match.group(2),)
def twitter_text(self, text, urls, user_mentions):
"""
Use data from the twitter API to turn links and mentions into A tags.
This works by using the urls and user_mentions data from the twitter
API.
The first step is finding the locations of the URLs and mentions in the
text. For each match we build a dictionary with the start location, end
location, the URL to link to, and the text to show in the link.
Next we sort the matches by start location. And for each we add the
text from the end of the last link to the start of the current link to
the output. The text needs to added to the text attribute of the first
node (the P tag) or the tail the last link created.
Finally we add any remaining text to the last node.
"""
to_linkify = []
# Build dicts for URLs
for short_url, full_url in urls.items():
for match in re.finditer(re.escape(short_url), text):
to_linkify.append({
'start': match.start(),
'end': match.end(),
'url': short_url,
'text': full_url,
})
# Build dicts for mentions
for user_mention in user_mentions:
screen_name = user_mention['screen_name']
mention_string = '@' + screen_name
for match in re.finditer(re.escape(mention_string), text):
to_linkify.append({
'start': match.start(),
'end': match.end(),
'url': 'https://twitter.com/' + urllib.quote(screen_name),
'text': mention_string,
})
def set_text(text):
"""
Helper to set the text or the tail of the current_node
"""
if current_node == p:
current_node.text = text
else:
current_node.tail = text
to_linkify.sort(key=lambda x: x['start'])
p = current_node = markdown.util.etree.Element('p')
current_index = 0
for link in to_linkify:
# The text we want to link starts in already linked text skip it
if link['start'] < current_index:
continue
# Add text from the end of last link to the start of the current
# link
set_text(text[current_index:link['start']])
current_index = link['end']
current_node = a = url_to_a(link['url'], link['text'])
p.append(a)
# Add any unused text
set_text(text[current_index:])
return p
def twitter_link(self, url):
tweet_id = get_tweet_id(url)
@@ -214,10 +284,15 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
# back gracefully.
image_url = user.get('profile_image_url_https', user['profile_image_url'])
profile_img.set('src', image_url)
p = markdown.util.etree.SubElement(tweet, 'p')
## TODO: unescape is an internal function, so we should
## use something else if we can find it
p.text = HTMLParser.HTMLParser().unescape(res['text'])
text = HTMLParser.HTMLParser().unescape(res['text'])
urls = res.get('urls', {})
user_mentions = res.get('user_mentions', [])
p = self.twitter_text(text, urls, user_mentions)
tweet.append(p)
span = markdown.util.etree.SubElement(tweet, 'span')
span.text = "- %s (@%s)" % (user['name'], user['screen_name'])

View File

@@ -2,10 +2,8 @@ from __future__ import absolute_import
import ujson
def twitter(tweet_id):
if tweet_id not in ["112652479837110273", "287977969287315456", "287977969287315457"]:
return None
return ujson.loads("""{
NORMAL_TWEET = """{
"coordinates": null,
"created_at": "Sat Sep 10 22:23:38 +0000 2011",
"truncated": false,
@@ -62,5 +60,109 @@ def twitter(tweet_id):
"show_all_inline_media": false
},
"in_reply_to_screen_name": "twitter",
"in_reply_to_status_id": null
}""")
"in_reply_to_status_id": null,
"user_mentions": [
{
"screen_name": "twitter",
"name": "Twitter",
"id": 1
},
{
"screen_name": "seepicturely",
"name": "Seepicturely",
"id": 2
},
{
"screen_name": "boscomonkey",
"name": "Bosco So",
"id": 3
},
{
"screen_name": "episod",
"name": "Taylor Singletary",
"id": 4
}
],
"urls": {
"http://t.co/6J2EgYM": "http://instagram.com/p/MuW67/"
}
}"""
MENTION_IN_LINK_TWEET = """{
"coordinates": null,
"created_at": "Sat Sep 10 22:23:38 +0000 2011",
"truncated": false,
"favorited": false,
"id_str": "112652479837110273",
"in_reply_to_user_id_str": "783214",
"text": "http://t.co/@foo",
"contributors": null,
"id": 112652479837110273,
"retweet_count": 0,
"in_reply_to_status_id_str": null,
"geo": null,
"retweeted": false,
"possibly_sensitive": false,
"in_reply_to_user_id": 783214,
"user": {
"profile_sidebar_border_color": "eeeeee",
"profile_background_tile": true,
"profile_sidebar_fill_color": "efefef",
"name": "Eoin McMillan ",
"profile_image_url": "http://a1.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png",
"created_at": "Mon May 16 20:07:59 +0000 2011",
"location": "Twitter",
"profile_link_color": "009999",
"follow_request_sent": null,
"is_translator": false,
"id_str": "299862462",
"favourites_count": 0,
"default_profile": false,
"url": "http://www.eoin.me",
"contributors_enabled": false,
"id": 299862462,
"utc_offset": null,
"profile_image_url_https": "https://si0.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png",
"profile_use_background_image": true,
"listed_count": 0,
"followers_count": 9,
"lang": "en",
"profile_text_color": "333333",
"protected": false,
"profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif",
"description": "Eoin's photography account. See @mceoin for tweets.",
"geo_enabled": false,
"verified": false,
"profile_background_color": "131516",
"time_zone": null,
"notifications": null,
"statuses_count": 255,
"friends_count": 0,
"default_profile_image": false,
"profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif",
"screen_name": "imeoin",
"following": null,
"show_all_inline_media": false
},
"in_reply_to_screen_name": "twitter",
"in_reply_to_status_id": null,
"user_mentions": [
{
"screen_name": "foo",
"name": "Foo",
"id": 1
}
],
"urls": {
"http://t.co/@foo": "http://foo.com"
}
}"""
def twitter(tweet_id):
if tweet_id in ["112652479837110273", "287977969287315456", "287977969287315457"]:
return ujson.loads(NORMAL_TWEET)
elif tweet_id == "287977969287315458":
return ujson.loads(MENTION_IN_LINK_TWEET)
else:
return None

View File

@@ -4284,9 +4284,13 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
def make_link(url):
return '<a href="%s" target="_blank" title="%s">%s</a>' % (url, url, url)
def make_inline_twitter_preview(url):
normal_tweet_html = """<a href="https://twitter.com/twitter" target="_blank" title="https://twitter.com/twitter">@twitter</a> meets <a href="https://twitter.com/seepicturely" target="_blank" title="https://twitter.com/seepicturely">@seepicturely</a> at #tcdisrupt cc.<a href="https://twitter.com/boscomonkey" target="_blank" title="https://twitter.com/boscomonkey">@boscomonkey</a> <a href="https://twitter.com/episod" target="_blank" title="https://twitter.com/episod">@episod</a> <a href="http://t.co/6J2EgYM" target="_blank" title="http://t.co/6J2EgYM">http://instagram.com/p/MuW67/</a>"""
mention_in_link_tweet_html = """<a href="http://t.co/@foo" target="_blank" title="http://t.co/@foo">http://foo.com</a>"""
def make_inline_twitter_preview(url, tweet_html):
## As of right now, all previews are mocked to be the exact same tweet
return """<div class="inline-preview-twitter"><div class="twitter-tweet"><a href="%s" target="_blank"><img class="twitter-avatar" src="https://si0.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png"></a><p>@twitter meets @seepicturely at #tcdisrupt cc.@boscomonkey @episod http://t.co/6J2EgYM</p><span>- Eoin McMillan (@imeoin)</span></div></div>""" % (url, )
return """<div class="inline-preview-twitter"><div class="twitter-tweet"><a href="%s" target="_blank"><img class="twitter-avatar" src="https://si0.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png"></a><p>%s</p><span>- Eoin McMillan (@imeoin)</span></div></div>""" % (url, tweet_html)
msg = 'http://www.twitter.com'
converted = bugdown_convert(msg)
@@ -4313,25 +4317,31 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
msg = 'http://www.twitter.com/wdaher/status/287977969287315456'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>%s</p>\n%s' % (make_link('http://www.twitter.com/wdaher/status/287977969287315456'),
make_inline_twitter_preview('http://www.twitter.com/wdaher/status/287977969287315456')))
make_inline_twitter_preview('http://www.twitter.com/wdaher/status/287977969287315456', normal_tweet_html)))
msg = 'https://www.twitter.com/wdaher/status/287977969287315456'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>%s</p>\n%s' % (make_link('https://www.twitter.com/wdaher/status/287977969287315456'),
make_inline_twitter_preview('https://www.twitter.com/wdaher/status/287977969287315456')))
make_inline_twitter_preview('https://www.twitter.com/wdaher/status/287977969287315456', normal_tweet_html)))
msg = 'http://twitter.com/wdaher/status/287977969287315456'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>%s</p>\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315456'),
make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456')))
make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456', normal_tweet_html)))
# Only one should get converted
msg = 'http://twitter.com/wdaher/status/287977969287315456 http://twitter.com/wdaher/status/287977969287315457'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>%s %s</p>\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315456'),
make_link('http://twitter.com/wdaher/status/287977969287315457'),
make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456')))
make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456', normal_tweet_html)))
# Tweet has a mention in a URL, only the URL is linked
msg = 'http://twitter.com/wdaher/status/287977969287315458'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>%s</p>\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315458'),
make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315458', mention_in_link_tweet_html)))
def test_emoji(self):
def emoji_img(name, filename=None):