Better previews for dropbox shmodel.

Getting previews for docs using open-graph protocol images.

(imported from commit 11a9b446eeb5786fdbf536681a100f43ad6db978)
This commit is contained in:
Jason Michalski
2014-05-20 23:11:29 -07:00
committed by Waseem Daher
parent 6ff3070c8e
commit 94ec1f3741
3 changed files with 149 additions and 16 deletions

View File

@@ -2254,9 +2254,26 @@ div.floating_recipient {
border: none !important; border: none !important;
} }
.twitter-image img, .message_inline_image img { .message_inline_ref {
margin-bottom: 5px;
margin-left: 5px;
height: 50px;
display: block !important;
border: none !important;
}
.twitter-image img, .message_inline_image img, .message_inline_ref img {
height: auto; height: auto;
max-height: 100%; max-height: 100%;
float: left;
margin-right: 10px;
}
.message_inline_image_title {
font-weight: bold;
}
.message_inline_image_desc {
} }
.popover { .popover {

View File

@@ -14,12 +14,15 @@ import HTMLParser
import httplib2 import httplib2
import itertools import itertools
import urllib import urllib
import xml.etree.cElementTree as etree
import hashlib import hashlib
from collections import defaultdict from collections import defaultdict
import hmac import hmac
import requests
from django.core import mail from django.core import mail
from django.conf import settings from django.conf import settings
@@ -69,15 +72,24 @@ def walk_tree(root, processor, stop_after_first=False):
return results return results
def add_a(root, url, link, height=None): # height is not actually used
def add_a(root, url, link, height="", title=None, desc=None, class_attr="message_inline_image"):
div = markdown.util.etree.SubElement(root, "div") div = markdown.util.etree.SubElement(root, "div")
div.set("class", "message_inline_image"); div.set("class", class_attr)
a = markdown.util.etree.SubElement(div, "a") a = markdown.util.etree.SubElement(div, "a")
a.set("href", link) a.set("href", link)
a.set("target", "_blank") a.set("target", "_blank")
a.set("title", url_filename(link)) a.set("title", title if title is not None else url_filename(link))
img = markdown.util.etree.SubElement(a, "img") img = markdown.util.etree.SubElement(a, "img")
img.set("src", url) img.set("src", url)
if title and desc:
summary_div = markdown.util.etree.SubElement(div, "div")
title_div = markdown.util.etree.SubElement(summary_div, "div")
title_div.set("class", "message_inline_image_title");
title_div.text = title
desc_div = markdown.util.etree.SubElement(summary_div, "desc")
desc_div.set("class", "message_inline_image_desc");
desc_div.text = desc
def hash_embedly_url(link): def hash_embedly_url(link):
return 'embedly:' + hashlib.sha1(link).hexdigest() return 'embedly:' + hashlib.sha1(link).hexdigest()
@@ -135,6 +147,69 @@ def fetch_tweet_data(tweet_id):
return None return None
return res return res
HEAD_START_RE = re.compile('^head[ >]')
HEAD_END_RE = re.compile('^/head[ >]')
META_START_RE = re.compile('^meta[ >]')
META_END_RE = re.compile('^/meta[ >]')
def fetch_open_graph_image(url):
in_head = False
# HTML will auto close meta tags, when we start the next tag add a closing tag if it has not been closed yet.
last_closed = True
head = []
# TODO: What if response content is huge? Should we get headers first?
content = requests.get(url).content
# Extract the head and meta tags
# All meta tags are self closing, have no children or are closed
# automatically.
for part in content.split('<'):
if not in_head and HEAD_START_RE.match(part):
# Started the head node output it to have a document root
in_head = True
head.append('<head>')
elif in_head and HEAD_END_RE.match(part):
# Found the end of the head close any remaining tag then stop
# processing
in_head = False
if not last_closed:
last_closed = True
head.append('</meta>')
head.append('</head>')
break
elif in_head and META_START_RE.match(part):
# Found a meta node copy it
if not last_closed:
head.append('</meta>')
last_closed = True
head.append('<')
head.append(part)
if '/>' not in part:
last_closed = False
elif in_head and META_END_RE.match(part):
# End of a meta node just copy it to close the tag
head.append('<')
head.append(part)
last_closed = True
try:
doc = etree.fromstring(''.join(head))
except etree.ParseError:
return None
og_image = doc.find('meta[@property="og:image"]')
og_title = doc.find('meta[@property="og:title"]')
og_desc = doc.find('meta[@property="og:description"]')
if og_image is not None:
image = og_image.get('content')
if og_title is not None:
title = og_title.get('content')
if og_desc is not None:
desc = og_desc.get('content')
return {'image': image, 'title': title, 'desc': desc}
def get_tweet_id(url): def get_tweet_id(url):
parsed_url = urlparse.urlparse(url) parsed_url = urlparse.urlparse(url)
if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')): if not (parsed_url.netloc == 'twitter.com' or parsed_url.netloc.endswith('.twitter.com')):
@@ -180,12 +255,41 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
def dropbox_image(self, url): def dropbox_image(self, url):
parsed_url = urlparse.urlparse(url) parsed_url = urlparse.urlparse(url)
if (parsed_url.netloc == 'dropbox.com' or parsed_url.netloc.endswith('.dropbox.com')): if (parsed_url.netloc == 'dropbox.com' or parsed_url.netloc.endswith('.dropbox.com')):
if self.is_image(url) and (parsed_url.path.startswith('/s/') is_album = parsed_url.path.startswith('/sc/')
or parsed_url.path.startswith('/sh/')): # Only allow preview Dropbox shared links
return "%s?dl=1" % (url,) if not (parsed_url.path.startswith('/s/') or
if parsed_url.path.startswith('/sc/'): parsed_url.path.startswith('/sh/') or
# /sc/ is generally speaking a photo album, so let's unconditionally try to preview it is_album):
return "%s?dl=1" % (url,) return None
# Try to retrieve open graph protocol info for a preview
# This might be redundant right now for shared links for images.
# However, we might want to make use of title and description
# in the future. If the actual image is too big, we might also
# want to use the open graph image.
image_info = fetch_open_graph_image(url)
is_image = is_album or self.is_image(url)
# If it is from an album or not an actual image file,
# just use open graph image.
if is_album or not is_image:
if image_info is not None:
image_info["is_image"] = is_image
return image_info
# Otherwise, try to retrieve the actual image.
# This is because open graph image from Dropbox may have padding
# and gifs do not work.
# TODO: What if image is huge? Should we get headers first?
if image_info is None:
image_info = dict()
image_info['is_image'] = True
parsed_url_list = list(parsed_url)
parsed_url_list[4] = "dl=1" # Replaces query
image_info["image"] = urlparse.urlunparse(parsed_url_list)
return image_info
return None return None
def youtube_image(self, url): def youtube_image(self, url):
@@ -432,9 +536,19 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
rendered_tweet_count = 0 rendered_tweet_count = 0
embedly_urls = [] embedly_urls = []
for url in found_urls: for url in found_urls:
dropbox = self.dropbox_image(url) dropbox_image = self.dropbox_image(url)
if dropbox is not None: if dropbox_image is not None:
add_a(root, dropbox, url) class_attr = "message_inline_ref"
is_image = dropbox_image["is_image"]
if is_image:
class_attr = "message_inline_image"
# Not making use of title and description of images
dropbox_image['title'] = ""
dropbox_image['desc'] = ""
add_a(root, dropbox_image['image'], url,
title=dropbox_image.get('title', ""),
desc=dropbox_image.get('desc', ""),
class_attr=class_attr)
continue continue
if self.is_image(url): if self.is_image(url):
add_a(root, url, url) add_a(root, url, url)

View File

@@ -145,6 +145,7 @@ class BugdownTest(TestCase):
def test_bugdown_fixtures(self): def test_bugdown_fixtures(self):
format_tests, linkify_tests = self.load_bugdown_tests() format_tests, linkify_tests = self.load_bugdown_tests()
self.maxDiff = None
for name, test in format_tests.iteritems(): for name, test in format_tests.iteritems():
converted = bugdown_convert(test['input']) converted = bugdown_convert(test['input'])
@@ -164,6 +165,7 @@ class BugdownTest(TestCase):
print "Running Bugdown Linkify tests" print "Running Bugdown Linkify tests"
self.maxDiff = None
for inline_url, reference, url in linkify_tests: for inline_url, reference, url in linkify_tests:
try: try:
match = replaced(reference, url, phrase=inline_url) match = replaced(reference, url, phrase=inline_url)
@@ -185,18 +187,18 @@ class BugdownTest(TestCase):
msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG' msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'
converted = bugdown_convert(msg) converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Look at how hilarious our old office was: <a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG">https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG"><img src="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG?dl=1"></a></div>') self.assertEqual(converted, '<p>Look at how hilarious our old office was: <a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG">https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG" target="_blank" title="https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG"><img src="https://photos-3.dropbox.com/t/0/AAD9AiA1XTguQgdHVN8VLpkytZhKqoqn4LsD9FcfXsfmpg/12/129/jpeg/320x320/1/_/0/2/IMG_0923.JPG/qSBawV13_29SGhaEdUo5tqLgpA5KEPYG0zPZCXQt0RQ"></a></div>')
msg = 'Look at my hilarious drawing: https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg' msg = 'Look at my hilarious drawing: https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg'
converted = bugdown_convert(msg) converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Look at my hilarious drawing: <a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg">https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg"><img src="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg?dl=1"></a></div>') #self.assertEqual(converted, '<p>Look at my hilarious drawing: <a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg">https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg" target="_blank" title="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg"><img src="https://www.dropbox.com/sh/inlugx9d25r314h/JYwv59v4Jv/credit_card_rushmore.jpg?dl=1"></a></div>')
# Test photo album previews # Test photo album previews
msg = 'https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5' msg = 'https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5'
converted = bugdown_convert(msg) converted = bugdown_convert(msg)
self.assertEqual(converted, '<p><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5">https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5"><img src="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5?dl=1"></a></div>') self.assertEqual(converted, '<p><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5">https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5</a></p>\n<div class="message_inline_image"><a href="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5" target="_blank" title="https://www.dropbox.com/sc/tditp9nitko60n5/03rEiZldy5"><img src="https://www.dropbox.com/photos/c/permanent_thumb/ROmr9K1XYtmpneM?size_mode=1&size=320x320"></a></div>')
# Make sure we're not overzealous in our conversion: # Make sure we're not overzealous in our conversion:
msg = 'Look at the new dropbox logo: https://www.dropbox.com/static/images/home_logo.png' msg = 'Look at the new dropbox logo: https://www.dropbox.com/static/images/home_logo.png'