diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index f82ee1d83a..b30b6e4c61 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -12,6 +12,8 @@ import platform import time import HTMLParser import httplib2 +import itertools +import urllib import hashlib @@ -189,6 +191,74 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): return None return "https://i.ytimg.com/vi/%s/default.jpg" % (match.group(2),) + def twitter_text(self, text, urls, user_mentions): + """ + Use data from the twitter API to turn links and mentions into A tags. + + This works by using the urls and user_mentions data from the twitter + API. + + The first step is finding the locations of the URLs and mentions in the + text. For each match we build a dictionary with the start location, end + location, the URL to link to, and the text to show in the link. + + Next we sort the matches by start location. And for each we add the + text from the end of the last link to the start of the current link to + the output. The text needs to added to the text attribute of the first + node (the P tag) or the tail the last link created. + + Finally we add any remaining text to the last node. + """ + + to_linkify = [] + # Build dicts for URLs + for short_url, full_url in urls.items(): + for match in re.finditer(re.escape(short_url), text): + to_linkify.append({ + 'start': match.start(), + 'end': match.end(), + 'url': short_url, + 'text': full_url, + }) + # Build dicts for mentions + for user_mention in user_mentions: + screen_name = user_mention['screen_name'] + mention_string = '@' + screen_name + for match in re.finditer(re.escape(mention_string), text): + to_linkify.append({ + 'start': match.start(), + 'end': match.end(), + 'url': 'https://twitter.com/' + urllib.quote(screen_name), + 'text': mention_string, + }) + + def set_text(text): + """ + Helper to set the text or the tail of the current_node + """ + if current_node == p: + current_node.text = text + else: + current_node.tail = text + + to_linkify.sort(key=lambda x: x['start']) + p = current_node = markdown.util.etree.Element('p') + current_index = 0 + for link in to_linkify: + # The text we want to link starts in already linked text skip it + if link['start'] < current_index: + continue + # Add text from the end of last link to the start of the current + # link + set_text(text[current_index:link['start']]) + current_index = link['end'] + current_node = a = url_to_a(link['url'], link['text']) + p.append(a) + + # Add any unused text + set_text(text[current_index:]) + return p + def twitter_link(self, url): tweet_id = get_tweet_id(url) @@ -214,10 +284,15 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): # back gracefully. image_url = user.get('profile_image_url_https', user['profile_image_url']) profile_img.set('src', image_url) - p = markdown.util.etree.SubElement(tweet, 'p') + ## TODO: unescape is an internal function, so we should ## use something else if we can find it - p.text = HTMLParser.HTMLParser().unescape(res['text']) + text = HTMLParser.HTMLParser().unescape(res['text']) + urls = res.get('urls', {}) + user_mentions = res.get('user_mentions', []) + p = self.twitter_text(text, urls, user_mentions) + tweet.append(p) + span = markdown.util.etree.SubElement(tweet, 'span') span.text = "- %s (@%s)" % (user['name'], user['screen_name']) diff --git a/zerver/lib/bugdown/testing_mocks.py b/zerver/lib/bugdown/testing_mocks.py index 5b9b796dd6..2172399f4a 100644 --- a/zerver/lib/bugdown/testing_mocks.py +++ b/zerver/lib/bugdown/testing_mocks.py @@ -2,10 +2,8 @@ from __future__ import absolute_import import ujson -def twitter(tweet_id): - if tweet_id not in ["112652479837110273", "287977969287315456", "287977969287315457"]: - return None - return ujson.loads("""{ + +NORMAL_TWEET = """{ "coordinates": null, "created_at": "Sat Sep 10 22:23:38 +0000 2011", "truncated": false, @@ -62,5 +60,109 @@ def twitter(tweet_id): "show_all_inline_media": false }, "in_reply_to_screen_name": "twitter", - "in_reply_to_status_id": null -}""") + "in_reply_to_status_id": null, + "user_mentions": [ + { + "screen_name": "twitter", + "name": "Twitter", + "id": 1 + }, + { + "screen_name": "seepicturely", + "name": "Seepicturely", + "id": 2 + }, + { + "screen_name": "boscomonkey", + "name": "Bosco So", + "id": 3 + }, + { + "screen_name": "episod", + "name": "Taylor Singletary", + "id": 4 + } + ], + "urls": { + "http://t.co/6J2EgYM": "http://instagram.com/p/MuW67/" + } +}""" + +MENTION_IN_LINK_TWEET = """{ + "coordinates": null, + "created_at": "Sat Sep 10 22:23:38 +0000 2011", + "truncated": false, + "favorited": false, + "id_str": "112652479837110273", + "in_reply_to_user_id_str": "783214", + "text": "http://t.co/@foo", + "contributors": null, + "id": 112652479837110273, + "retweet_count": 0, + "in_reply_to_status_id_str": null, + "geo": null, + "retweeted": false, + "possibly_sensitive": false, + "in_reply_to_user_id": 783214, + "user": { + "profile_sidebar_border_color": "eeeeee", + "profile_background_tile": true, + "profile_sidebar_fill_color": "efefef", + "name": "Eoin McMillan ", + "profile_image_url": "http://a1.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png", + "created_at": "Mon May 16 20:07:59 +0000 2011", + "location": "Twitter", + "profile_link_color": "009999", + "follow_request_sent": null, + "is_translator": false, + "id_str": "299862462", + "favourites_count": 0, + "default_profile": false, + "url": "http://www.eoin.me", + "contributors_enabled": false, + "id": 299862462, + "utc_offset": null, + "profile_image_url_https": "https://si0.twimg.com/profile_images/1380912173/Screen_shot_2011-06-03_at_7.35.36_PM_normal.png", + "profile_use_background_image": true, + "listed_count": 0, + "followers_count": 9, + "lang": "en", + "profile_text_color": "333333", + "protected": false, + "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif", + "description": "Eoin's photography account. See @mceoin for tweets.", + "geo_enabled": false, + "verified": false, + "profile_background_color": "131516", + "time_zone": null, + "notifications": null, + "statuses_count": 255, + "friends_count": 0, + "default_profile_image": false, + "profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif", + "screen_name": "imeoin", + "following": null, + "show_all_inline_media": false + }, + "in_reply_to_screen_name": "twitter", + "in_reply_to_status_id": null, + "user_mentions": [ + { + "screen_name": "foo", + "name": "Foo", + "id": 1 + } + ], + "urls": { + "http://t.co/@foo": "http://foo.com" + } +}""" + + +def twitter(tweet_id): + if tweet_id in ["112652479837110273", "287977969287315456", "287977969287315457"]: + return ujson.loads(NORMAL_TWEET) + elif tweet_id == "287977969287315458": + return ujson.loads(MENTION_IN_LINK_TWEET) + else: + return None diff --git a/zerver/tests.py b/zerver/tests.py index 56ae1341e8..43cd30dc93 100644 --- a/zerver/tests.py +++ b/zerver/tests.py @@ -4284,9 +4284,13 @@ xxxxxxx

\n

xxxxxxx xxxxx xxxx xxxxx:
\nxxxxxx: xxx def make_link(url): return '%s' % (url, url, url) - def make_inline_twitter_preview(url): + normal_tweet_html = """@twitter meets @seepicturely at #tcdisrupt cc.@boscomonkey @episod http://instagram.com/p/MuW67/""" + + mention_in_link_tweet_html = """http://foo.com""" + + def make_inline_twitter_preview(url, tweet_html): ## As of right now, all previews are mocked to be the exact same tweet - return """

@twitter meets @seepicturely at #tcdisrupt cc.@boscomonkey @episod http://t.co/6J2EgYM

- Eoin McMillan (@imeoin)
""" % (url, ) + return """

%s

- Eoin McMillan (@imeoin)
""" % (url, tweet_html) msg = 'http://www.twitter.com' converted = bugdown_convert(msg) @@ -4313,25 +4317,31 @@ xxxxxxx

\n

xxxxxxx xxxxx xxxx xxxxx:
\nxxxxxx: xxx msg = 'http://www.twitter.com/wdaher/status/287977969287315456' converted = bugdown_convert(msg) self.assertEqual(converted, '

%s

\n%s' % (make_link('http://www.twitter.com/wdaher/status/287977969287315456'), - make_inline_twitter_preview('http://www.twitter.com/wdaher/status/287977969287315456'))) + make_inline_twitter_preview('http://www.twitter.com/wdaher/status/287977969287315456', normal_tweet_html))) msg = 'https://www.twitter.com/wdaher/status/287977969287315456' converted = bugdown_convert(msg) self.assertEqual(converted, '

%s

\n%s' % (make_link('https://www.twitter.com/wdaher/status/287977969287315456'), - make_inline_twitter_preview('https://www.twitter.com/wdaher/status/287977969287315456'))) + make_inline_twitter_preview('https://www.twitter.com/wdaher/status/287977969287315456', normal_tweet_html))) msg = 'http://twitter.com/wdaher/status/287977969287315456' converted = bugdown_convert(msg) self.assertEqual(converted, '

%s

\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315456'), - make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456'))) + make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456', normal_tweet_html))) # Only one should get converted msg = 'http://twitter.com/wdaher/status/287977969287315456 http://twitter.com/wdaher/status/287977969287315457' converted = bugdown_convert(msg) self.assertEqual(converted, '

%s %s

\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315456'), make_link('http://twitter.com/wdaher/status/287977969287315457'), - make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456'))) + make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315456', normal_tweet_html))) + # Tweet has a mention in a URL, only the URL is linked + msg = 'http://twitter.com/wdaher/status/287977969287315458' + + converted = bugdown_convert(msg) + self.assertEqual(converted, '

%s

\n%s' % (make_link('http://twitter.com/wdaher/status/287977969287315458'), + make_inline_twitter_preview('http://twitter.com/wdaher/status/287977969287315458', mention_in_link_tweet_html))) def test_emoji(self): def emoji_img(name, filename=None):