Better previews for dropbox shmodel.

Getting previews for docs using open-graph protocol images.

(imported from commit 11a9b446eeb5786fdbf536681a100f43ad6db978)
This commit is contained in:
Jason Michalski
2014-05-20 23:11:29 -07:00
committed by Waseem Daher
parent 6ff3070c8e
commit 94ec1f3741
3 changed files with 149 additions and 16 deletions

View File

@@ -2254,9 +2254,26 @@ div.floating_recipient {
border: none !important;
}
.twitter-image img, .message_inline_image img {
.message_inline_ref {
margin-bottom: 5px;
margin-left: 5px;
height: 50px;
display: block !important;
border: none !important;
}
.twitter-image img, .message_inline_image img, .message_inline_ref img {
height: auto;
max-height: 100%;
float: left;
margin-right: 10px;
}
.message_inline_image_title {
font-weight: bold;
}
.message_inline_image_desc {
}
.popover {

View File

@@ -14,12 +14,15 @@ import HTMLParser
import httplib2
import itertools
import urllib
import xml.etree.cElementTree as etree
import hashlib
from collections import defaultdict
import hmac
import requests
from django.core import mail
from django.conf import settings
@@ -69,15 +72,24 @@ def walk_tree(root, processor, stop_after_first=False):
return results
def add_a(root, url, link, height=None):
# height is not actually used
def add_a(root, url, link, height="", title=None, desc=None, class_attr="message_inline_image"):
div = markdown.util.etree.SubElement(root, "div")
div.set("class", "message_inline_image");
div.set("class", class_attr)
a = markdown.util.etree.SubElement(div, "a")
a.set("href", link)
a.set("target", "_blank")
a.set("title", url_filename(link))
a.set("title", title if title is not None else url_filename(link))
img = markdown.util.etree.SubElement(a, "img")
img.set("src", url)
if title and desc:
summary_div = markdown.util.etree.SubElement(div, "div")
title_div = markdown.util.etree.SubElement(summary_div, "div")
title_div.set("class", "message_inline_image_title");
title_div.text = title
desc_div = markdown.util.etree.SubElement(summary_div, "desc")
desc_div.set("class", "message_inline_image_desc");
desc_div.text = desc
def hash_embedly_url(link):
return 'embedly:' + hashlib.sha1(link).hexdigest()
@@ -135,6 +147,69 @@ def fetch_tweet_data(tweet_id):
return None
return res
HEAD_START_RE = re.compile('^head[ >]')
HEAD_END_RE = re.compile('^/head[ >]')
META_START_RE = re.compile('^meta[ >]')
META_END_RE = re.compile('^/meta[ >]')
def fetch_open_graph_image(url):
in_head = False
# HTML will auto close meta tags, when we start the next tag add a closing tag if it has not been closed yet.
last_closed = True
head = []
# TODO: What if response content is huge? Should we get headers first?
content = requests.get(url).content
# Extract the head and meta tags
# All meta tags are self closing, have no children or are closed
# automatically.
for part in content.split('<'):
if not in_head and HEAD_START_RE.match(part):
# Started the head node output it to have a document root
in_head = True
head.append('<head>')
elif in_head and HEAD_END_RE.match(part):
# Found the end of the head close any remaining tag then stop
# processing
in_head = False
if not last_closed:
last_closed = True
head.append('</meta>')
head.append('</head>')
break
elif in_head and META_START_RE.match(part):
# Found a meta node copy it
if not last_closed:
head.append('</meta>')
last_closed = True
head.append('<')
head.append(part)
if '/>' not in part:
last_closed = False
elif in_head and META_END_RE.match(part):
# End of a meta node just copy it to close the tag
head.append('<')
head.append(part)
last_closed = True
try:
doc = etree.fromstring(''.join(head))
except etree.ParseError:
return None
og_image = doc.find('meta[@property="og:image"]')
og_title = doc.find('meta[@property="og:title"]')
og_desc = doc.find('meta[@property="og:description"]')
if og_image is not None:
image = og_image.get('content')
if og_title is not None:
title = og_title.get('content')
if og_desc is not None:
desc = og_desc.get('content')
return {'image': image, 'title': title, 'desc': desc}
def get_tweet_id(url):
parsed_url = urlparse.urlparse(url)
if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')):
@@ -180,12 +255,41 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
def dropbox_image(self, url):
parsed_url = urlparse.urlparse(url)
if (parsed_url.netloc == 'dropbox.com' or parsed_url.netloc.endswith('.dropbox.com')):
if self.is_image(url) and (parsed_url.path.startswith('/s/')
or parsed_url.path.startswith('/sh/')):
return "%s?dl=1" % (url,)
if parsed_url.path.startswith('/sc/'):
# /sc/ is generally speaking a photo album, so let's unconditionally try to preview it
return "%s?dl=1" % (url,)
is_album = parsed_url.path.startswith('/sc/')
# Only allow preview Dropbox shared links
if not (parsed_url.path.startswith('/s/') or
parsed_url.path.startswith('/sh/') or
is_album):
return None
# Try to retrieve open graph protocol info for a preview
# This might be redundant right now for shared links for images.
# However, we might want to make use of title and description
# in the future. If the actual image is too big, we might also
# want to use the open graph image.
image_info = fetch_open_graph_image(url)
is_image = is_album or self.is_image(url)
# If it is from an album or not an actual image file,
# just use open graph image.
if is_album or not is_image:
if image_info is not None:
image_info["is_image"] = is_image
return image_info
# Otherwise, try to retrieve the actual image.
# This is because open graph image from Dropbox may have padding
# and gifs do not work.
# TODO: What if image is huge? Should we get headers first?
if image_info is None:
image_info = dict()
image_info['is_image'] = True
parsed_url_list = list(parsed_url)
parsed_url_list[4] = "dl=1" # Replaces query
image_info["image"] = urlparse.urlunparse(parsed_url_list)
return image_info
return None
def youtube_image(self, url):
@@ -432,9 +536,19 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
rendered_tweet_count = 0
embedly_urls = []
for url in found_urls:
dropbox = self.dropbox_image(url)
if dropbox is not None:
add_a(root, dropbox, url)
dropbox_image = self.dropbox_image(url)
if dropbox_image is not None:
class_attr = "message_inline_ref"
is_image = dropbox_image["is_image"]
if is_image:
class_attr = "message_inline_image"
# Not making use of title and description of images
dropbox_image['title'] = ""
dropbox_image['desc'] = ""
add_a(root, dropbox_image['image'], url,
title=dropbox_image.get('title', ""),
desc=dropbox_image.get('desc', ""),
class_attr=class_attr)
continue
if self.is_image(url):
add_a(root, url, url)

View File

@@ -145,6 +145,7 @@ class BugdownTest(TestCase):
def test_bugdown_fixtures(self):
format_tests, linkify_tests = self.load_bugdown_tests()
self.maxDiff = None
for name, test in format_tests.iteritems():
converted = bugdown_convert(test['input'])
@@ -164,6 +165,7 @@ class BugdownTest(TestCase):
print "Running Bugdown Linkify tests"
self.maxDiff = None
for inline_url, reference, url in linkify_tests:
try:
match = replaced(reference, url, phrase=inline_url)
@@ -185,18 +187,18 @@ class BugdownTest(TestCase):
msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Look at how hilarious our old office was: <a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG">https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG"><img src="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG?dl=1"></a></div>')
self.assertEqual(converted, '<p>Look at how hilarious our old office was: <a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG">https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG"><img src="https://photos-3.dropbox.com/t/0/AAD9AiA1XTguQgdHVN8VLpkytZhKqoqn4LsD9FcfXsfmpg/12/129/jpeg/320x320/1/_/0/2/IMG_0923.JPG/qSBawV13_29SGhaEdUo5tqLgpA5KEPYG0zPZCXQt0RQ"></a></div>')
msg = 'Look at my hilarious drawing: https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Look at my hilarious drawing: <a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg">https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg"><img src="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg?dl=1"></a></div>')
#self.assertEqual(converted, '<p>Look at my hilarious drawing: <a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg">https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg"><img src="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg?dl=1"></a></div>')
# Test photo album previews
msg = 'https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5">https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5"><img src="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5?dl=1"></a></div>')
self.assertEqual(converted, '<p><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5">https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5"><img src="https://www.dropbox.com/photos/c/permanent_thumb/ROmr9K1XYtmpneM?size_mode=1&size=320x320"></a></div>')
# Make sure we're not overzealous in our conversion:
msg = 'Look at the new dropbox logo: https://www.dropbox.com/static/images/home_logo.png'