twitter: Detect link styles more robustly.

* Deal with shorter tweet IDs
  (some old tweets don't have a full 18-character ID)
* Allow trailing slash
* Deal with old-style #! syntax
* Deal with links that link to a photo

(imported from commit 008a98c806f3b8dddd9e2f18a8f002af6932766f)
This commit is contained in:
Waseem Daher
2013-12-13 17:45:01 -05:00
parent 1370c014a5
commit 14a5c6bf66
2 changed files with 14 additions and 1 deletions

View File

@@ -135,8 +135,12 @@ def get_tweet_id(url):
parsed_url = urlparse.urlparse(url) parsed_url = urlparse.urlparse(url)
if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')): if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')):
return False return False
to_match = parsed_url.path
# In old-style twitter.com/#!/wdaher/status/1231241234-style URLs, we need to look at the fragment instead
if parsed_url.path == '/' and len(parsed_url.fragment) > 5:
to_match= parsed_url.fragment
tweet_id_match = re.match(r'^/.*?/status(es)?/(?P<tweetid>\d{18})$', parsed_url.path) tweet_id_match = re.match(r'^!?/.*?/status(es)?/(?P<tweetid>\d{10,18})(/photo/[0-9])?/?$', to_match)
if not tweet_id_match: if not tweet_id_match:
return False return False
return tweet_id_match.group("tweetid") return tweet_id_match.group("tweetid")

View File

@@ -3783,6 +3783,15 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
converted = bugdown_convert(msg) converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>test </p>') self.assertEqual(converted, '<p>test </p>')
def test_twitter_id_extraction(self):
self.assertEqual(bugdown.get_tweet_id('http://twitter.com/#!/VizzQuotes/status/409030735191097344'), '409030735191097344')
self.assertEqual(bugdown.get_tweet_id('http://twitter.com/VizzQuotes/status/409030735191097344'), '409030735191097344')
self.assertEqual(bugdown.get_tweet_id('http://twitter.com/VizzQuotes/statuses/409030735191097344'), '409030735191097344')
self.assertEqual(bugdown.get_tweet_id('https://twitter.com/wdaher/status/1017581858'), '1017581858')
self.assertEqual(bugdown.get_tweet_id('https://twitter.com/wdaher/status/1017581858/'), '1017581858')
self.assertEqual(bugdown.get_tweet_id('https://twitter.com/windyoona/status/410766290349879296/photo/1'), '410766290349879296')
self.assertEqual(bugdown.get_tweet_id('https://twitter.com/windyoona/status/410766290349879296/'), '410766290349879296')
def test_inline_interesting_links(self): def test_inline_interesting_links(self):
def make_link(url): def make_link(url):
return '<a href="%s" target="_blank" title="%s">%s</a>' % (url, url, url) return '<a href="%s" target="_blank" title="%s">%s</a>' % (url, url, url)