mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
markdown: Remove vestigial Twitter code and cache.
Contrary to what the comment implied, the remaining `fetch_tweet_data` function would never return a cache hit, as it is namespaced by the current deploy's cache key.
This commit is contained in:
committed by
Tim Abbott
parent
9715f6c104
commit
c68be03df7
@@ -1,6 +1,5 @@
|
||||
# Zulip's main Markdown implementation. See docs/subsystems/markdown.md for
|
||||
# detailed documentation on our Markdown syntax.
|
||||
import html
|
||||
import logging
|
||||
import mimetypes
|
||||
import re
|
||||
@@ -13,7 +12,7 @@ from email.message import EmailMessage
|
||||
from functools import lru_cache
|
||||
from re import Match, Pattern
|
||||
from typing import Any, Generic, Optional, TypeAlias, TypedDict, TypeVar, cast
|
||||
from urllib.parse import parse_qs, parse_qsl, quote, urlencode, urljoin, urlsplit, urlunsplit
|
||||
from urllib.parse import parse_qs, parse_qsl, urlencode, urljoin, urlsplit, urlunsplit
|
||||
from xml.etree.ElementTree import Element, SubElement
|
||||
|
||||
import ahocorasick
|
||||
@@ -39,7 +38,6 @@ from tlds import tld_set
|
||||
from typing_extensions import NotRequired, Self, override
|
||||
|
||||
from zerver.lib import mention
|
||||
from zerver.lib.cache import cache_with_key
|
||||
from zerver.lib.camo import get_camo_url
|
||||
from zerver.lib.emoji import EMOTICON_RE, codepoint_to_name, name_to_codepoint, translate_emoticons
|
||||
from zerver.lib.emoji_utils import emoji_to_hex_codepoint, unqualify_emoji
|
||||
@@ -463,17 +461,6 @@ def has_blockquote_ancestor(element_pair: ElementPair | None) -> bool:
|
||||
return has_blockquote_ancestor(element_pair.parent)
|
||||
|
||||
|
||||
@cache_with_key(lambda tweet_id: tweet_id, cache_name="database")
|
||||
def fetch_tweet_data(tweet_id: str) -> dict[str, Any] | None:
|
||||
# Twitter removed support for the v1 API that this integration
|
||||
# used. Given that, there's no point wasting time trying to make
|
||||
# network requests to Twitter. But we leave this function, because
|
||||
# existing cached renderings for Tweets is useful. We throw an
|
||||
# exception rather than returning `None` to avoid caching that the
|
||||
# link doesn't exist.
|
||||
raise NotImplementedError("Twitter desupported their v1 API")
|
||||
|
||||
|
||||
class OpenGraphSession(OutgoingSession):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(role="markdown", timeout=1)
|
||||
@@ -524,24 +511,6 @@ def fetch_open_graph_image(url: str) -> dict[str, Any] | None:
|
||||
return None if og["image"] is None else og
|
||||
|
||||
|
||||
def get_tweet_id(url: str) -> str | None:
|
||||
parsed_url = urlsplit(url)
|
||||
if not (parsed_url.netloc == "twitter.com" or parsed_url.netloc.endswith(".twitter.com")):
|
||||
return None
|
||||
to_match = parsed_url.path
|
||||
# In old-style twitter.com/#!/wdaher/status/1231241234-style URLs,
|
||||
# we need to look at the fragment instead
|
||||
if parsed_url.path == "/" and len(parsed_url.fragment) > 5:
|
||||
to_match = parsed_url.fragment
|
||||
|
||||
tweet_id_match = re.match(
|
||||
r"^!?/.*?/status(es)?/(?P<tweetid>\d{10,30})(/photo/[0-9])?/?$", to_match
|
||||
)
|
||||
if not tweet_id_match:
|
||||
return None
|
||||
return tweet_id_match.group("tweetid")
|
||||
|
||||
|
||||
class InlineImageProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
"""
|
||||
Rewrite inline img tags to serve external content via Camo.
|
||||
@@ -638,8 +607,6 @@ class DropboxMediaInfo(TypedDict):
|
||||
|
||||
|
||||
class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
TWITTER_MAX_IMAGE_HEIGHT = 400
|
||||
TWITTER_MAX_TO_PREVIEW = 3
|
||||
INLINE_PREVIEW_LIMIT_PER_MESSAGE = 24
|
||||
|
||||
def __init__(self, zmd: "ZulipMarkdown") -> None:
|
||||
@@ -937,193 +904,6 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
return f"Vimeo - {extracted_data.title}"
|
||||
return None
|
||||
|
||||
def twitter_text(
|
||||
self,
|
||||
text: str,
|
||||
urls: list[dict[str, str]],
|
||||
user_mentions: list[dict[str, Any]],
|
||||
media: list[dict[str, Any]],
|
||||
) -> Element:
|
||||
"""
|
||||
Use data from the Twitter API to turn links, mentions and media into A
|
||||
tags. Also convert Unicode emojis to images.
|
||||
|
||||
This works by using the URLs, user_mentions and media data from
|
||||
the twitter API and searching for Unicode emojis in the text using
|
||||
`POSSIBLE_EMOJI_RE`.
|
||||
|
||||
The first step is finding the locations of the URLs, mentions, media and
|
||||
emoji in the text. For each match we build a dictionary with type, the start
|
||||
location, end location, the URL to link to, and the text(codepoint and title
|
||||
in case of emojis) to be used in the link(image in case of emojis).
|
||||
|
||||
Next we sort the matches by start location. And for each we add the
|
||||
text from the end of the last link to the start of the current link to
|
||||
the output. The text needs to added to the text attribute of the first
|
||||
node (the P tag) or the tail the last link created.
|
||||
|
||||
Finally we add any remaining text to the last node.
|
||||
"""
|
||||
|
||||
to_process: list[dict[str, Any]] = []
|
||||
# Build dicts for URLs
|
||||
for url_data in urls:
|
||||
to_process.extend(
|
||||
{
|
||||
"type": "url",
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
"url": url_data["url"],
|
||||
"text": url_data["expanded_url"],
|
||||
}
|
||||
for match in re.finditer(re.escape(url_data["url"]), text, re.IGNORECASE)
|
||||
)
|
||||
# Build dicts for mentions
|
||||
for user_mention in user_mentions:
|
||||
screen_name = user_mention["screen_name"]
|
||||
mention_string = "@" + screen_name
|
||||
to_process.extend(
|
||||
{
|
||||
"type": "mention",
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
"url": "https://twitter.com/" + quote(screen_name),
|
||||
"text": mention_string,
|
||||
}
|
||||
for match in re.finditer(re.escape(mention_string), text, re.IGNORECASE)
|
||||
)
|
||||
# Build dicts for media
|
||||
for media_item in media:
|
||||
short_url = media_item["url"]
|
||||
expanded_url = media_item["expanded_url"]
|
||||
to_process.extend(
|
||||
{
|
||||
"type": "media",
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
"url": short_url,
|
||||
"text": expanded_url,
|
||||
}
|
||||
for match in re.finditer(re.escape(short_url), text, re.IGNORECASE)
|
||||
)
|
||||
# Build dicts for emojis
|
||||
for match in POSSIBLE_EMOJI_RE.finditer(text):
|
||||
orig_syntax = match.group("syntax")
|
||||
codepoint = emoji_to_hex_codepoint(unqualify_emoji(orig_syntax))
|
||||
if codepoint in codepoint_to_name:
|
||||
display_string = ":" + codepoint_to_name[codepoint] + ":"
|
||||
to_process.append(
|
||||
{
|
||||
"type": "emoji",
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
"codepoint": codepoint,
|
||||
"title": display_string,
|
||||
}
|
||||
)
|
||||
|
||||
to_process.sort(key=lambda x: x["start"])
|
||||
p = current_node = Element("p")
|
||||
|
||||
def set_text(text: str) -> None:
|
||||
"""
|
||||
Helper to set the text or the tail of the current_node
|
||||
"""
|
||||
if current_node == p:
|
||||
current_node.text = text
|
||||
else:
|
||||
current_node.tail = text
|
||||
|
||||
db_data: DbData | None = self.zmd.zulip_db_data
|
||||
current_index = 0
|
||||
for item in to_process:
|
||||
# The text we want to link starts in already linked text skip it
|
||||
if item["start"] < current_index:
|
||||
continue
|
||||
# Add text from the end of last link to the start of the current
|
||||
# link
|
||||
set_text(text[current_index : item["start"]])
|
||||
current_index = item["end"]
|
||||
if item["type"] != "emoji":
|
||||
elem = url_to_a(db_data, item["url"], item["text"])
|
||||
assert isinstance(elem, Element)
|
||||
else:
|
||||
elem = make_emoji(item["codepoint"], item["title"])
|
||||
current_node = elem
|
||||
p.append(elem)
|
||||
|
||||
# Add any unused text
|
||||
set_text(text[current_index:])
|
||||
return p
|
||||
|
||||
def twitter_link(self, url: str) -> Element | None:
|
||||
tweet_id = get_tweet_id(url)
|
||||
|
||||
if tweet_id is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
res = fetch_tweet_data(tweet_id)
|
||||
if res is None:
|
||||
return None
|
||||
user: dict[str, Any] = res["user"]
|
||||
tweet = Element("div")
|
||||
tweet.set("class", "twitter-tweet")
|
||||
img_a = SubElement(tweet, "a")
|
||||
img_a.set("href", url)
|
||||
profile_img = SubElement(img_a, "img")
|
||||
profile_img.set("class", "twitter-avatar")
|
||||
# For some reason, for, e.g. tweet 285072525413724161,
|
||||
# python-twitter does not give us a
|
||||
# profile_image_url_https, but instead puts that URL in
|
||||
# profile_image_url. So use _https if available, but fall
|
||||
# back gracefully.
|
||||
image_url = user.get("profile_image_url_https", user["profile_image_url"])
|
||||
profile_img.set("src", image_url)
|
||||
|
||||
text = html.unescape(res["full_text"])
|
||||
urls = res.get("urls", [])
|
||||
user_mentions = res.get("user_mentions", [])
|
||||
media: list[dict[str, Any]] = res.get("media", [])
|
||||
p = self.twitter_text(text, urls, user_mentions, media)
|
||||
tweet.append(p)
|
||||
|
||||
span = SubElement(tweet, "span")
|
||||
span.text = "- {} (@{})".format(user["name"], user["screen_name"])
|
||||
|
||||
# Add image previews
|
||||
for media_item in media:
|
||||
# Only photos have a preview image
|
||||
if media_item["type"] != "photo":
|
||||
continue
|
||||
|
||||
# Find the image size that is smaller than
|
||||
# TWITTER_MAX_IMAGE_HEIGHT px tall or the smallest
|
||||
size_name_tuples = sorted(
|
||||
media_item["sizes"].items(), reverse=True, key=lambda x: x[1]["h"]
|
||||
)
|
||||
for size_name, size in size_name_tuples:
|
||||
if size["h"] < self.TWITTER_MAX_IMAGE_HEIGHT:
|
||||
break
|
||||
|
||||
media_url = "{}:{}".format(media_item["media_url_https"], size_name)
|
||||
img_div = SubElement(tweet, "div")
|
||||
img_div.set("class", "twitter-image")
|
||||
img_a = SubElement(img_div, "a")
|
||||
img_a.set("href", media_item["url"])
|
||||
img = SubElement(img_a, "img")
|
||||
img.set("src", media_url)
|
||||
|
||||
return tweet
|
||||
except NotImplementedError:
|
||||
return None
|
||||
except Exception:
|
||||
# We put this in its own try-except because it requires external
|
||||
# connectivity. If Twitter flakes out, we don't want to not-render
|
||||
# the entire message; we just want to not show the Twitter preview.
|
||||
markdown_logger.warning("Error building Twitter link", exc_info=True)
|
||||
return None
|
||||
|
||||
def get_url_data(self, e: Element) -> tuple[str, str | None] | None:
|
||||
if e.tag == "a":
|
||||
url = e.get("href")
|
||||
@@ -1198,23 +978,6 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
if info["remove"] is not None:
|
||||
info["parent"].remove(info["remove"])
|
||||
|
||||
def handle_tweet_inlining(
|
||||
self,
|
||||
root: Element,
|
||||
found_url: ResultWithFamily[tuple[str, str | None]],
|
||||
twitter_data: Element,
|
||||
) -> None:
|
||||
info = self.get_inlining_information(root, found_url)
|
||||
|
||||
if info["index"] is not None:
|
||||
div = Element("div")
|
||||
root.insert(info["index"], div)
|
||||
else:
|
||||
div = SubElement(root, "div")
|
||||
|
||||
div.set("class", "inline-preview-twitter")
|
||||
div.insert(0, twitter_data)
|
||||
|
||||
def handle_youtube_url_inlining(
|
||||
self,
|
||||
root: Element,
|
||||
@@ -1252,7 +1015,6 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
uncle = grandparent[insertion_index]
|
||||
inline_image_classes = {
|
||||
"message_inline_image",
|
||||
"inline-preview-twitter",
|
||||
}
|
||||
if (
|
||||
uncle.tag != "div"
|
||||
@@ -1339,7 +1101,6 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
return
|
||||
|
||||
processed_urls: set[str] = set()
|
||||
rendered_tweet_count = 0
|
||||
|
||||
for found_url in found_urls:
|
||||
(url, text) = found_url.result
|
||||
@@ -1404,17 +1165,6 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
# We don't have a strong use case for doing URL preview for relative links.
|
||||
continue
|
||||
|
||||
if get_tweet_id(url) is not None:
|
||||
if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
|
||||
# Only render at most one tweet per message
|
||||
continue
|
||||
twitter_data = self.twitter_link(url)
|
||||
if twitter_data is None:
|
||||
# This link is not actually a tweet known to twitter
|
||||
continue
|
||||
rendered_tweet_count += 1
|
||||
self.handle_tweet_inlining(root, found_url, twitter_data)
|
||||
continue
|
||||
youtube = self.youtube_image(url)
|
||||
if youtube is not None:
|
||||
self.handle_youtube_url_inlining(root, found_url, youtube)
|
||||
|
@@ -40,8 +40,6 @@ from zerver.lib.markdown import (
|
||||
MessageRenderingResult,
|
||||
clear_web_link_regex_for_testing,
|
||||
content_has_emoji_syntax,
|
||||
fetch_tweet_data,
|
||||
get_tweet_id,
|
||||
image_preview_enabled,
|
||||
markdown_convert,
|
||||
maybe_update_markdown_engines,
|
||||
@@ -1267,39 +1265,6 @@ class MarkdownEmbedsTest(ZulipTestCase):
|
||||
f"""<p><a href="https://www.youtube.com/watch?v=0c46YHS3RY8">https://www.youtube.com/watch?v=0c46YHS3RY8</a><br>\n<a href="https://www.youtube.com/watch?v=lXFO2ULktEI">https://www.youtube.com/watch?v=lXFO2ULktEI</a></p>\n<div class="youtube-video message_inline_image"><a data-id="0c46YHS3RY8" href="https://www.youtube.com/watch?v=0c46YHS3RY8"><img src="{get_camo_url("https://i.ytimg.com/vi/0c46YHS3RY8/mqdefault.jpg")}"></a></div><div class="youtube-video message_inline_image"><a data-id="lXFO2ULktEI" href="https://www.youtube.com/watch?v=lXFO2ULktEI"><img src="{get_camo_url("https://i.ytimg.com/vi/lXFO2ULktEI/mqdefault.jpg")}"></a></div>""",
|
||||
)
|
||||
|
||||
def test_twitter_id_extraction(self) -> None:
|
||||
self.assertEqual(
|
||||
get_tweet_id("http://twitter.com/#!/VizzQuotes/status/409030735191097344"),
|
||||
"409030735191097344",
|
||||
)
|
||||
self.assertEqual(
|
||||
get_tweet_id("http://twitter.com/VizzQuotes/status/409030735191097344"),
|
||||
"409030735191097344",
|
||||
)
|
||||
self.assertEqual(
|
||||
get_tweet_id("http://twitter.com/VizzQuotes/statuses/409030735191097344"),
|
||||
"409030735191097344",
|
||||
)
|
||||
self.assertEqual(get_tweet_id("https://twitter.com/wdaher/status/1017581858"), "1017581858")
|
||||
self.assertEqual(
|
||||
get_tweet_id("https://twitter.com/wdaher/status/1017581858/"), "1017581858"
|
||||
)
|
||||
self.assertEqual(
|
||||
get_tweet_id("https://twitter.com/windyoona/status/410766290349879296/photo/1"),
|
||||
"410766290349879296",
|
||||
)
|
||||
self.assertEqual(
|
||||
get_tweet_id("https://twitter.com/windyoona/status/410766290349879296/"),
|
||||
"410766290349879296",
|
||||
)
|
||||
|
||||
def test_fetch_tweet_data_settings_validation(self) -> None:
|
||||
with (
|
||||
self.settings(TEST_SUITE=False, TWITTER_CONSUMER_KEY=None),
|
||||
self.assertRaises(NotImplementedError),
|
||||
):
|
||||
fetch_tweet_data("287977969287315459")
|
||||
|
||||
|
||||
class MarkdownEmojiTest(ZulipTestCase):
|
||||
def test_content_has_emoji(self) -> None:
|
||||
|
@@ -426,9 +426,9 @@ CACHES: dict[str, dict[str, object]] = {
|
||||
"database": {
|
||||
"BACKEND": "django.core.cache.backends.db.DatabaseCache",
|
||||
"LOCATION": "third_party_api_results",
|
||||
# This cache shouldn't timeout; we're really just using the
|
||||
# cache API to store the results of requests to third-party
|
||||
# APIs like the Twitter API permanently.
|
||||
# This is currently unused; it was previously used to cache
|
||||
# API responses from third-party APIs like the Twitter API
|
||||
# permanently.
|
||||
"TIMEOUT": None,
|
||||
"OPTIONS": {
|
||||
"MAX_ENTRIES": 100000000,
|
||||
|
Reference in New Issue
Block a user