mirror of
https://github.com/zulip/zulip.git
synced 2025-11-06 15:03:34 +00:00
messages: Set has_attachment correctly using Bugdown.
Previously, we would naively set has_attachment just by searching
the whole messages for strings like `/user_uploads/...`. We now
prevent running do_claim_attachments for messages that obviously
do not have an attachment in them that we previously ran.
For example: attachments in codeblocks or
attachments that otherwise do not match our link syntax.
The new implementation runs that check on only the urls that
bugdown determines should be rendered. We also refactor some
Attachment tests in test_messages to test this change.
The new method is:
1. Create a list of potential_attachment_urls in Bugdown while rendering.
2. Loop over this list in do_claim_attachments for the actual claiming.
For saving:
3. If we claimed an attachment, set message.has_attachment to True.
For updating:
3. If claimed_attachment != message.has_attachment: update has_attachment.
We do not modify the logic for 'unclaiming' attachments when editing.
This commit is contained in:
committed by
Tim Abbott
parent
4674cc5098
commit
3892a8afd8
@@ -1397,6 +1397,13 @@ def do_send_messages(messages_maybe_none: Sequence[Optional[MutableMapping[str,
|
||||
user_message_flags = defaultdict(dict) # type: Dict[int, Dict[int, List[str]]]
|
||||
with transaction.atomic():
|
||||
Message.objects.bulk_create([message['message'] for message in messages])
|
||||
|
||||
# Claim attachments in message
|
||||
for message in messages:
|
||||
if do_claim_attachments(message['message']):
|
||||
message['message'].has_attachment = True
|
||||
message['message'].save(update_fields=['has_attachment'])
|
||||
|
||||
ums = [] # type: List[UserMessageLite]
|
||||
for message in messages:
|
||||
# Service bots (outgoing webhook bots and embedded bots) don't store UserMessage rows;
|
||||
@@ -1427,11 +1434,6 @@ def do_send_messages(messages_maybe_none: Sequence[Optional[MutableMapping[str,
|
||||
|
||||
bulk_insert_ums(ums)
|
||||
|
||||
# Claim attachments in message
|
||||
for message in messages:
|
||||
if Message.content_has_attachment(message['message'].content):
|
||||
do_claim_attachments(message['message'])
|
||||
|
||||
for message in messages:
|
||||
do_widget_post_save_actions(message)
|
||||
|
||||
@@ -4501,7 +4503,7 @@ def do_update_message(user_profile: UserProfile, message: Message, topic_name: O
|
||||
|
||||
prev_content = edit_history_event['prev_content']
|
||||
if Message.content_has_attachment(prev_content) or Message.content_has_attachment(message.content):
|
||||
check_attachment_reference_change(prev_content, message)
|
||||
message.has_attachment = check_attachment_reference_change(prev_content, message)
|
||||
|
||||
if message.is_stream_message():
|
||||
if topic_name is not None:
|
||||
@@ -5451,9 +5453,12 @@ def notify_attachment_update(user_profile: UserProfile, op: str,
|
||||
}
|
||||
send_event(user_profile.realm, event, [user_profile.id])
|
||||
|
||||
def do_claim_attachments(message: Message) -> None:
|
||||
attachment_url_list = attachment_url_re.findall(message.content)
|
||||
def do_claim_attachments(message: Message) -> bool:
|
||||
attachment_url_list = message.potential_attachment_urls
|
||||
if not attachment_url_list:
|
||||
return False
|
||||
|
||||
claimed = False
|
||||
for url in attachment_url_list:
|
||||
path_id = attachment_url_to_path_id(url)
|
||||
user_profile = message.sender
|
||||
@@ -5475,8 +5480,10 @@ def do_claim_attachments(message: Message) -> None:
|
||||
user_profile.id, path_id, message.id))
|
||||
continue
|
||||
|
||||
claimed = True
|
||||
attachment = claim_attachment(user_profile, path_id, message, is_message_realm_public)
|
||||
notify_attachment_update(user_profile, "update", attachment.to_dict())
|
||||
return claimed
|
||||
|
||||
def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
|
||||
old_unclaimed_attachments = get_old_unclaimed_attachments(weeks_ago)
|
||||
@@ -5485,7 +5492,7 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
|
||||
delete_message_image(attachment.path_id)
|
||||
attachment.delete()
|
||||
|
||||
def check_attachment_reference_change(prev_content: str, message: Message) -> None:
|
||||
def check_attachment_reference_change(prev_content: str, message: Message) -> bool:
|
||||
new_content = message.content
|
||||
prev_attachments = set(attachment_url_re.findall(prev_content))
|
||||
new_attachments = set(attachment_url_re.findall(new_content))
|
||||
@@ -5499,9 +5506,11 @@ def check_attachment_reference_change(prev_content: str, message: Message) -> No
|
||||
attachments_to_update = Attachment.objects.filter(path_id__in=path_ids).select_for_update()
|
||||
message.attachment_set.remove(*attachments_to_update)
|
||||
|
||||
claimed = False
|
||||
to_add = list(new_attachments - prev_attachments)
|
||||
if len(to_add) > 0:
|
||||
do_claim_attachments(message)
|
||||
claimed = do_claim_attachments(message)
|
||||
return claimed
|
||||
|
||||
def notify_realm_custom_profile_fields(realm: Realm, operation: str) -> None:
|
||||
fields = custom_profile_fields_for_realm(realm.id)
|
||||
|
||||
@@ -129,6 +129,10 @@ STREAM_TOPIC_LINK_REGEX = r"""
|
||||
def get_compiled_stream_topic_link_regex() -> Pattern:
|
||||
return verbose_compile(STREAM_TOPIC_LINK_REGEX)
|
||||
|
||||
@one_time
|
||||
def get_compiled_attachment_regex() -> Pattern:
|
||||
return verbose_compile(r'(?P<path>[/\-]user[\-_]uploads[/\.-].*)')
|
||||
|
||||
LINK_REGEX = None # type: Pattern
|
||||
|
||||
def get_web_link_regex() -> str:
|
||||
@@ -1044,18 +1048,36 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
def is_absolute_url(self, url: str) -> bool:
|
||||
return bool(urllib.parse.urlparse(url).netloc)
|
||||
|
||||
def maybe_append_attachment_url(self, url: str) -> None:
|
||||
attachment_regex = get_compiled_attachment_regex()
|
||||
m = attachment_regex.match(url)
|
||||
if m:
|
||||
urls = self.markdown.zulip_message.potential_attachment_urls
|
||||
urls.append(m.group('path'))
|
||||
self.markdown.zulip_message.potential_attachment_urls = urls
|
||||
|
||||
def run(self, root: Element) -> None:
|
||||
# Get all URLs from the blob
|
||||
found_urls = walk_tree_with_family(root, self.get_url_data)
|
||||
unique_urls = {found_url.result[0] for found_url in found_urls}
|
||||
# Collect unique URLs which are not quoted as we don't do
|
||||
# inline previews for links inside blockquotes.
|
||||
unique_previewable_urls = {found_url.result[0] for found_url in found_urls
|
||||
if not found_url.family.in_blockquote}
|
||||
|
||||
# Set has_link and similar flags whenever a message is processed by bugdown
|
||||
if self.markdown.zulip_message:
|
||||
self.markdown.zulip_message.has_link = len(found_urls) > 0
|
||||
self.markdown.zulip_message.has_image = False # This is updated in self.add_a
|
||||
# Populate potential_attachment_urls. do_claim_attachments() handles setting has_attachments.
|
||||
self.markdown.zulip_message.potential_attachment_urls = []
|
||||
for url in unique_urls:
|
||||
self.maybe_append_attachment_url(url)
|
||||
|
||||
# Collect unique URLs which are not quoted
|
||||
unique_urls = {found_url.result[0] for found_url in found_urls if not found_url.family.in_blockquote}
|
||||
if len(found_urls) == 0 or len(unique_urls) > self.INLINE_PREVIEW_LIMIT_PER_MESSAGE:
|
||||
if len(found_urls) == 0:
|
||||
return
|
||||
|
||||
if len(unique_previewable_urls) > self.INLINE_PREVIEW_LIMIT_PER_MESSAGE:
|
||||
return
|
||||
|
||||
processed_urls = set() # type: Set[str]
|
||||
@@ -1064,7 +1086,7 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||
for found_url in found_urls:
|
||||
(url, text) = found_url.result
|
||||
|
||||
if url in unique_urls and url not in processed_urls:
|
||||
if url in unique_previewable_urls and url not in processed_urls:
|
||||
processed_urls.add(url)
|
||||
else:
|
||||
continue
|
||||
|
||||
@@ -1726,6 +1726,14 @@ class Message(AbstractMessage):
|
||||
'website', 'ios', 'android')) or (
|
||||
'desktop app' in sending_client)
|
||||
|
||||
@property
|
||||
def potential_attachment_urls(self) -> List[str]:
|
||||
return getattr(self, '_potential_attachment_urls', [])
|
||||
|
||||
@potential_attachment_urls.setter
|
||||
def potential_attachment_urls(self, urls: List[str]) -> None:
|
||||
self._potential_attachment_urls = urls
|
||||
|
||||
@staticmethod
|
||||
def content_has_attachment(content: str) -> Match:
|
||||
return re.search(r'[/\-]user[\-_]uploads[/\.-]', content)
|
||||
@@ -1742,8 +1750,7 @@ class Message(AbstractMessage):
|
||||
|
||||
def update_calculated_fields(self) -> None:
|
||||
# TODO: rendered_content could also be considered a calculated field
|
||||
content = self.content
|
||||
self.has_attachment = bool(Message.content_has_attachment(content))
|
||||
return
|
||||
|
||||
@receiver(pre_save, sender=Message)
|
||||
def pre_save_message(sender: Any, **kwargs: Any) -> None:
|
||||
|
||||
@@ -3570,19 +3570,9 @@ class MessageAccessTests(ZulipTestCase):
|
||||
|
||||
self.assertEqual(len(filtered_messages), 2)
|
||||
|
||||
class AttachmentTest(ZulipTestCase):
|
||||
def test_basics(self) -> None:
|
||||
zulip_realm = get_realm("zulip")
|
||||
self.assertFalse(Message.content_has_attachment('whatever'))
|
||||
self.assertFalse(Message.content_has_attachment('yo http://foo.com'))
|
||||
self.assertTrue(Message.content_has_attachment('yo\n https://staging.zulip.com/user_uploads/'))
|
||||
self.assertTrue(Message.content_has_attachment('yo\n /user_uploads/%s/wEAnI-PEmVmCjo15xxNaQbnj/photo-10.jpg foo' % (
|
||||
zulip_realm.id,)))
|
||||
|
||||
def test_claim_attachment(self) -> None:
|
||||
|
||||
# Create dummy DB entry
|
||||
user_profile = self.example_user('hamlet')
|
||||
class MessageHasKeywordsTest(ZulipTestCase):
|
||||
'''Test for keywords like has_link, has_image, has_attachment.'''
|
||||
def setup_dummy_attachments(self, user_profile: UserProfile) -> List[str]:
|
||||
sample_size = 10
|
||||
realm_id = user_profile.realm_id
|
||||
dummy_files = [
|
||||
@@ -3594,21 +3584,54 @@ class AttachmentTest(ZulipTestCase):
|
||||
for file_name, path_id, size in dummy_files:
|
||||
create_attachment(file_name, path_id, user_profile, size)
|
||||
|
||||
# return path ids
|
||||
return [x[1] for x in dummy_files]
|
||||
|
||||
def test_basics(self) -> None:
|
||||
zulip_realm = get_realm("zulip")
|
||||
self.assertFalse(Message.content_has_attachment('whatever'))
|
||||
self.assertFalse(Message.content_has_attachment('yo http://foo.com'))
|
||||
self.assertTrue(Message.content_has_attachment('yo\n https://staging.zulip.com/user_uploads/'))
|
||||
self.assertTrue(Message.content_has_attachment('yo\n /user_uploads/%s/wEAnI-PEmVmCjo15xxNaQbnj/photo-10.jpg foo' % (
|
||||
zulip_realm.id,)))
|
||||
|
||||
def test_claim_attachment(self) -> None:
|
||||
user_profile = self.example_user('hamlet')
|
||||
dummy_path_ids = self.setup_dummy_attachments(user_profile)
|
||||
dummy_urls = ["http://localhost:9991/user_uploads/{}".format(x) for x in dummy_path_ids]
|
||||
|
||||
# Send message referring the attachment
|
||||
self.subscribe(user_profile, "Denmark")
|
||||
|
||||
body = ("Some files here ...[zulip.txt](http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" +
|
||||
"http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." +
|
||||
"http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(realm_id=realm_id)
|
||||
|
||||
self.send_stream_message(user_profile.email, "Denmark", body, "test")
|
||||
|
||||
for file_name, path_id, size in dummy_files:
|
||||
def assert_attachment_claimed(path_id: str, claimed: bool) -> None:
|
||||
attachment = Attachment.objects.get(path_id=path_id)
|
||||
self.assertTrue(attachment.is_claimed())
|
||||
self.assertEqual(attachment.is_claimed(), claimed)
|
||||
|
||||
class MessageHasKeywordsTest(ZulipTestCase):
|
||||
'''Test for keywords like has_link, has_image, has_attachment'''
|
||||
# This message should claim attachments 1 only because attachment 2
|
||||
# is not being parsed as a link by Bugdown.
|
||||
body = ("Some files here ...[zulip.txt]({})" +
|
||||
"{}.... Some more...." +
|
||||
"{}").format(dummy_urls[0], dummy_urls[1], dummy_urls[1])
|
||||
self.send_stream_message(user_profile.email, "Denmark", body, "test")
|
||||
assert_attachment_claimed(dummy_path_ids[0], True)
|
||||
assert_attachment_claimed(dummy_path_ids[1], False)
|
||||
|
||||
# This message tries to claim the third attachment but fails because
|
||||
# Bugdown would not set has_attachments = True here.
|
||||
body = "Link in code: `{}`".format(dummy_urls[2])
|
||||
self.send_stream_message(user_profile.email, "Denmark", body, "test")
|
||||
assert_attachment_claimed(dummy_path_ids[2], False)
|
||||
|
||||
# Another scenario where we wouldn't parse the link.
|
||||
body = "Link to not parse: .{}.`".format(dummy_urls[2])
|
||||
self.send_stream_message(user_profile.email, "Denmark", body, "test")
|
||||
assert_attachment_claimed(dummy_path_ids[2], False)
|
||||
|
||||
# Finally, claim attachment 3.
|
||||
body = "Link: {}".format(dummy_urls[2])
|
||||
self.send_stream_message(user_profile.email, "Denmark", body, "test")
|
||||
assert_attachment_claimed(dummy_path_ids[2], True)
|
||||
assert_attachment_claimed(dummy_path_ids[1], False)
|
||||
|
||||
def test_finds_all_links(self) -> None:
|
||||
msg_ids = []
|
||||
@@ -3645,7 +3668,8 @@ class MessageHasKeywordsTest(ZulipTestCase):
|
||||
self.assertTrue(msg.has_link)
|
||||
self.update_message(msg, 'a')
|
||||
self.assertFalse(msg.has_link)
|
||||
self.update_message(msg, 'a http://foo.com')
|
||||
# Check in blockquotes work
|
||||
self.update_message(msg, '> http://bar.com')
|
||||
self.assertTrue(msg.has_link)
|
||||
self.update_message(msg, 'a `http://foo.com`')
|
||||
self.assertFalse(msg.has_link)
|
||||
@@ -3667,6 +3691,29 @@ class MessageHasKeywordsTest(ZulipTestCase):
|
||||
self.update_message(msgs[0], 'No Image Again')
|
||||
self.assertFalse(msgs[0].has_image)
|
||||
|
||||
def test_has_attachment(self) -> None:
|
||||
# This test could belong in AttachmentTest as well, but
|
||||
# we keep it with other 'has:' tests.
|
||||
hamlet = self.example_user('hamlet')
|
||||
dummy_path_ids = self.setup_dummy_attachments(hamlet)
|
||||
dummy_urls = ["http://localhost:9991/user_uploads/{}".format(x) for x in dummy_path_ids]
|
||||
self.subscribe(hamlet, "Denmark")
|
||||
|
||||
body = ("Files ...[zulip.txt]({}) {} {}").format(dummy_urls[0], dummy_urls[1], dummy_urls[2])
|
||||
|
||||
msg_id = self.send_stream_message(hamlet.email, "Denmark", body, "test")
|
||||
msg = Message.objects.get(id=msg_id)
|
||||
self.assertTrue(msg.has_attachment)
|
||||
self.update_message(msg, 'No Attachments')
|
||||
self.assertFalse(msg.has_attachment)
|
||||
self.update_message(msg, body)
|
||||
self.assertTrue(msg.has_attachment)
|
||||
self.update_message(msg, 'Link in code: `{}`'.format(dummy_urls[1]))
|
||||
self.assertFalse(msg.has_attachment)
|
||||
# Test blockquotes
|
||||
self.update_message(msg, '> {}'.format(dummy_urls[1]))
|
||||
self.assertTrue(msg.has_attachment)
|
||||
|
||||
class MissedMessageTest(ZulipTestCase):
|
||||
def test_presence_idle_user_ids(self) -> None:
|
||||
UserPresence.objects.all().delete()
|
||||
|
||||
@@ -143,9 +143,9 @@ class ArchiveMessagesTestingBase(RetentionTestingBase):
|
||||
create_attachment(file_name, path_id, user_profile, size)
|
||||
|
||||
self.subscribe(user_profile, "Denmark")
|
||||
body = ("Some files here ...[zulip.txt](http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" +
|
||||
"http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." +
|
||||
"http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id)
|
||||
body = ("Some files here ... [zulip.txt](http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" +
|
||||
" http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." +
|
||||
" http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id)
|
||||
|
||||
expired_message_id = self.send_stream_message(sender_email, "Denmark", body)
|
||||
actual_message_id = self.send_stream_message(sender_email, "Denmark", body)
|
||||
|
||||
Reference in New Issue
Block a user