markdown: Change URL structure for dropbox preview to be up-to-date.

The URL structure for a shared link has changed since this function was
returned and this commit makes sure our code is in compliance with that
structure.

The concept of an album doesn't exist anymore and folders exist in-lieu
of that.

For dropbox links that are folders on non-image files, we show previews
same as any other link previews. It is not possible to get information
about the shared link except whether it is a file or folder. So for
title and description for that linked preview, we use `Dropbox file` or
`Dropbox folder` respectively.

Earlier, we were just having raw=1 as the query param to get the image
file if required, but now for every dropbox sharing link, preserving
query params is important (otherwise we get a 404), this commit makes
changes to address that.

For /sc/ links, it is not possible to generate them anymore (afaik), but
it is possible to view those existing links, so we support that link but
treat it as a folder instead.

You can check
https://www.dropboxforum.com/discussions/101001012/shared-link--scl-to-s/689070/replies/695266
for URL structure info.

We have used inline ignore for codespell since fo can be a valid
misspell of `of` and we don't want to ignore that.

https://chat.zulip.org/#narrow/channel/9-issues/topic/.F0.9F.93.82.20message_inline_ref.20dropbox.20links

Co-authored-by: Tim Abbott <tabbott@zulip.com>
This commit is contained in:
Shubham Padia
2025-06-11 11:42:46 +00:00
committed by Tim Abbott
parent 22b5744726
commit bace83ec5a
5 changed files with 84 additions and 35 deletions

View File

@@ -13,7 +13,7 @@ from email.message import EmailMessage
from functools import lru_cache
from re import Match, Pattern
from typing import Any, Generic, Optional, TypeAlias, TypedDict, TypeVar, cast
from urllib.parse import parse_qs, quote, urljoin, urlsplit, urlunsplit
from urllib.parse import parse_qs, parse_qsl, quote, urlencode, urljoin, urlsplit, urlunsplit
from xml.etree.ElementTree import Element, SubElement
import ahocorasick
@@ -803,11 +803,15 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
# TODO: The returned Dict could possibly be a TypedDict in future.
parsed_url = urlsplit(url)
if parsed_url.netloc == "dropbox.com" or parsed_url.netloc.endswith(".dropbox.com"):
is_album = parsed_url.path.startswith("/sc/") or parsed_url.path.startswith("/photos/")
# Only allow preview Dropbox shared links
if not (
parsed_url.path.startswith("/s/") or parsed_url.path.startswith("/sh/") or is_album
):
# See https://www.dropboxforum.com/discussions/101001012/shared-link--scl-to-s/689070/replies/695266
# for more info on the URL structure mentioned here.
# It is not possible to generate /sc/ links which is kind of a showcase
# for multiple images. We treat it now as a folder instead.
is_album = parsed_url.path.startswith(
"/scl/fo/" # codespell:ignore fo
) or parsed_url.path.startswith("/sc/")
is_file = parsed_url.path.startswith("/scl/fi/")
if not (is_file or is_album):
return None
# Try to retrieve open graph protocol info for a preview
@@ -817,7 +821,7 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
# want to use the open graph image.
image_info = fetch_open_graph_image(url)
is_image = is_album or self.is_image(url)
is_image = self.is_image(url)
# If it is from an album or not an actual image file,
# just use open graph image.
@@ -827,17 +831,31 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
if image_info is None:
return None
if is_album:
image_info["title"] = "Dropbox folder"
image_info["desc"] = "Click to open folder."
else:
image_info["title"] = "Dropbox file"
image_info["desc"] = "Click to open file."
image_info["is_image"] = is_image
return image_info
# Otherwise, try to retrieve the actual image.
# Try to retrieve the actual image.
# This is because open graph image from Dropbox may have padding
# and gifs do not work.
# TODO: What if image is huge? Should we get headers first?
if image_info is None:
image_info = {}
image_info["is_image"] = True
image_info["image"] = parsed_url._replace(query="raw=1").geturl()
# Adding raw=1 as query param will give us the URL of the
# actual image instead of the dropbox image preview page.
query_params = dict(parse_qsl(parsed_url.query))
query_params["raw"] = "1"
query = urlencode(query_params)
image_info["image"] = parsed_url._replace(query=query).geturl()
return image_info
return None
@@ -1339,20 +1357,22 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
dropbox_image = self.dropbox_image(url)
if dropbox_image is not None:
class_attr = "message_inline_ref"
is_image = dropbox_image["is_image"]
if is_image:
class_attr = "message_inline_image"
# Not making use of title and description of images
self.add_a(
root,
image_url=dropbox_image["image"],
link=url,
title=dropbox_image.get("title"),
desc=dropbox_image.get("desc", ""),
class_attr=class_attr,
already_thumbnailed=True,
found_url = ResultWithFamily(
family=found_url.family,
result=(dropbox_image["image"], dropbox_image["image"]),
)
self.handle_image_inlining(root, found_url)
continue
dropbox_embed_data = UrlEmbedData(
type="image",
title=dropbox_image["title"],
description=dropbox_image["desc"],
image=dropbox_image["image"],
)
self.add_embed(root, url, dropbox_embed_data)
continue
if self.is_image(url):