diff --git a/zerver/lib/actions.py b/zerver/lib/actions.py index 51571efbd4..d20e1bb84a 100644 --- a/zerver/lib/actions.py +++ b/zerver/lib/actions.py @@ -1397,6 +1397,13 @@ def do_send_messages(messages_maybe_none: Sequence[Optional[MutableMapping[str, user_message_flags = defaultdict(dict) # type: Dict[int, Dict[int, List[str]]] with transaction.atomic(): Message.objects.bulk_create([message['message'] for message in messages]) + + # Claim attachments in message + for message in messages: + if do_claim_attachments(message['message']): + message['message'].has_attachment = True + message['message'].save(update_fields=['has_attachment']) + ums = [] # type: List[UserMessageLite] for message in messages: # Service bots (outgoing webhook bots and embedded bots) don't store UserMessage rows; @@ -1427,11 +1434,6 @@ def do_send_messages(messages_maybe_none: Sequence[Optional[MutableMapping[str, bulk_insert_ums(ums) - # Claim attachments in message - for message in messages: - if Message.content_has_attachment(message['message'].content): - do_claim_attachments(message['message']) - for message in messages: do_widget_post_save_actions(message) @@ -4501,7 +4503,7 @@ def do_update_message(user_profile: UserProfile, message: Message, topic_name: O prev_content = edit_history_event['prev_content'] if Message.content_has_attachment(prev_content) or Message.content_has_attachment(message.content): - check_attachment_reference_change(prev_content, message) + message.has_attachment = check_attachment_reference_change(prev_content, message) if message.is_stream_message(): if topic_name is not None: @@ -5451,9 +5453,12 @@ def notify_attachment_update(user_profile: UserProfile, op: str, } send_event(user_profile.realm, event, [user_profile.id]) -def do_claim_attachments(message: Message) -> None: - attachment_url_list = attachment_url_re.findall(message.content) +def do_claim_attachments(message: Message) -> bool: + attachment_url_list = message.potential_attachment_urls + if not attachment_url_list: + return False + claimed = False for url in attachment_url_list: path_id = attachment_url_to_path_id(url) user_profile = message.sender @@ -5475,8 +5480,10 @@ def do_claim_attachments(message: Message) -> None: user_profile.id, path_id, message.id)) continue + claimed = True attachment = claim_attachment(user_profile, path_id, message, is_message_realm_public) notify_attachment_update(user_profile, "update", attachment.to_dict()) + return claimed def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None: old_unclaimed_attachments = get_old_unclaimed_attachments(weeks_ago) @@ -5485,7 +5492,7 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None: delete_message_image(attachment.path_id) attachment.delete() -def check_attachment_reference_change(prev_content: str, message: Message) -> None: +def check_attachment_reference_change(prev_content: str, message: Message) -> bool: new_content = message.content prev_attachments = set(attachment_url_re.findall(prev_content)) new_attachments = set(attachment_url_re.findall(new_content)) @@ -5499,9 +5506,11 @@ def check_attachment_reference_change(prev_content: str, message: Message) -> No attachments_to_update = Attachment.objects.filter(path_id__in=path_ids).select_for_update() message.attachment_set.remove(*attachments_to_update) + claimed = False to_add = list(new_attachments - prev_attachments) if len(to_add) > 0: - do_claim_attachments(message) + claimed = do_claim_attachments(message) + return claimed def notify_realm_custom_profile_fields(realm: Realm, operation: str) -> None: fields = custom_profile_fields_for_realm(realm.id) diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index 60eb07b89f..696b135a88 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -129,6 +129,10 @@ STREAM_TOPIC_LINK_REGEX = r""" def get_compiled_stream_topic_link_regex() -> Pattern: return verbose_compile(STREAM_TOPIC_LINK_REGEX) +@one_time +def get_compiled_attachment_regex() -> Pattern: + return verbose_compile(r'(?P[/\-]user[\-_]uploads[/\.-].*)') + LINK_REGEX = None # type: Pattern def get_web_link_regex() -> str: @@ -1044,18 +1048,36 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): def is_absolute_url(self, url: str) -> bool: return bool(urllib.parse.urlparse(url).netloc) + def maybe_append_attachment_url(self, url: str) -> None: + attachment_regex = get_compiled_attachment_regex() + m = attachment_regex.match(url) + if m: + urls = self.markdown.zulip_message.potential_attachment_urls + urls.append(m.group('path')) + self.markdown.zulip_message.potential_attachment_urls = urls + def run(self, root: Element) -> None: # Get all URLs from the blob found_urls = walk_tree_with_family(root, self.get_url_data) + unique_urls = {found_url.result[0] for found_url in found_urls} + # Collect unique URLs which are not quoted as we don't do + # inline previews for links inside blockquotes. + unique_previewable_urls = {found_url.result[0] for found_url in found_urls + if not found_url.family.in_blockquote} # Set has_link and similar flags whenever a message is processed by bugdown if self.markdown.zulip_message: self.markdown.zulip_message.has_link = len(found_urls) > 0 self.markdown.zulip_message.has_image = False # This is updated in self.add_a + # Populate potential_attachment_urls. do_claim_attachments() handles setting has_attachments. + self.markdown.zulip_message.potential_attachment_urls = [] + for url in unique_urls: + self.maybe_append_attachment_url(url) - # Collect unique URLs which are not quoted - unique_urls = {found_url.result[0] for found_url in found_urls if not found_url.family.in_blockquote} - if len(found_urls) == 0 or len(unique_urls) > self.INLINE_PREVIEW_LIMIT_PER_MESSAGE: + if len(found_urls) == 0: + return + + if len(unique_previewable_urls) > self.INLINE_PREVIEW_LIMIT_PER_MESSAGE: return processed_urls = set() # type: Set[str] @@ -1064,7 +1086,7 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): for found_url in found_urls: (url, text) = found_url.result - if url in unique_urls and url not in processed_urls: + if url in unique_previewable_urls and url not in processed_urls: processed_urls.add(url) else: continue diff --git a/zerver/models.py b/zerver/models.py index d6a664ac94..a2ec675410 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -1726,6 +1726,14 @@ class Message(AbstractMessage): 'website', 'ios', 'android')) or ( 'desktop app' in sending_client) + @property + def potential_attachment_urls(self) -> List[str]: + return getattr(self, '_potential_attachment_urls', []) + + @potential_attachment_urls.setter + def potential_attachment_urls(self, urls: List[str]) -> None: + self._potential_attachment_urls = urls + @staticmethod def content_has_attachment(content: str) -> Match: return re.search(r'[/\-]user[\-_]uploads[/\.-]', content) @@ -1742,8 +1750,7 @@ class Message(AbstractMessage): def update_calculated_fields(self) -> None: # TODO: rendered_content could also be considered a calculated field - content = self.content - self.has_attachment = bool(Message.content_has_attachment(content)) + return @receiver(pre_save, sender=Message) def pre_save_message(sender: Any, **kwargs: Any) -> None: diff --git a/zerver/tests/test_messages.py b/zerver/tests/test_messages.py index e9a4047db9..b0b142d0dc 100644 --- a/zerver/tests/test_messages.py +++ b/zerver/tests/test_messages.py @@ -3570,19 +3570,9 @@ class MessageAccessTests(ZulipTestCase): self.assertEqual(len(filtered_messages), 2) -class AttachmentTest(ZulipTestCase): - def test_basics(self) -> None: - zulip_realm = get_realm("zulip") - self.assertFalse(Message.content_has_attachment('whatever')) - self.assertFalse(Message.content_has_attachment('yo http://foo.com')) - self.assertTrue(Message.content_has_attachment('yo\n https://staging.zulip.com/user_uploads/')) - self.assertTrue(Message.content_has_attachment('yo\n /user_uploads/%s/wEAnI-PEmVmCjo15xxNaQbnj/photo-10.jpg foo' % ( - zulip_realm.id,))) - - def test_claim_attachment(self) -> None: - - # Create dummy DB entry - user_profile = self.example_user('hamlet') +class MessageHasKeywordsTest(ZulipTestCase): + '''Test for keywords like has_link, has_image, has_attachment.''' + def setup_dummy_attachments(self, user_profile: UserProfile) -> List[str]: sample_size = 10 realm_id = user_profile.realm_id dummy_files = [ @@ -3594,21 +3584,54 @@ class AttachmentTest(ZulipTestCase): for file_name, path_id, size in dummy_files: create_attachment(file_name, path_id, user_profile, size) + # return path ids + return [x[1] for x in dummy_files] + + def test_basics(self) -> None: + zulip_realm = get_realm("zulip") + self.assertFalse(Message.content_has_attachment('whatever')) + self.assertFalse(Message.content_has_attachment('yo http://foo.com')) + self.assertTrue(Message.content_has_attachment('yo\n https://staging.zulip.com/user_uploads/')) + self.assertTrue(Message.content_has_attachment('yo\n /user_uploads/%s/wEAnI-PEmVmCjo15xxNaQbnj/photo-10.jpg foo' % ( + zulip_realm.id,))) + + def test_claim_attachment(self) -> None: + user_profile = self.example_user('hamlet') + dummy_path_ids = self.setup_dummy_attachments(user_profile) + dummy_urls = ["http://localhost:9991/user_uploads/{}".format(x) for x in dummy_path_ids] + # Send message referring the attachment self.subscribe(user_profile, "Denmark") - body = ("Some files here ...[zulip.txt](http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + - "http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + - "http://localhost:9991/user_uploads/{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(realm_id=realm_id) - - self.send_stream_message(user_profile.email, "Denmark", body, "test") - - for file_name, path_id, size in dummy_files: + def assert_attachment_claimed(path_id: str, claimed: bool) -> None: attachment = Attachment.objects.get(path_id=path_id) - self.assertTrue(attachment.is_claimed()) + self.assertEqual(attachment.is_claimed(), claimed) -class MessageHasKeywordsTest(ZulipTestCase): - '''Test for keywords like has_link, has_image, has_attachment''' + # This message should claim attachments 1 only because attachment 2 + # is not being parsed as a link by Bugdown. + body = ("Some files here ...[zulip.txt]({})" + + "{}.... Some more...." + + "{}").format(dummy_urls[0], dummy_urls[1], dummy_urls[1]) + self.send_stream_message(user_profile.email, "Denmark", body, "test") + assert_attachment_claimed(dummy_path_ids[0], True) + assert_attachment_claimed(dummy_path_ids[1], False) + + # This message tries to claim the third attachment but fails because + # Bugdown would not set has_attachments = True here. + body = "Link in code: `{}`".format(dummy_urls[2]) + self.send_stream_message(user_profile.email, "Denmark", body, "test") + assert_attachment_claimed(dummy_path_ids[2], False) + + # Another scenario where we wouldn't parse the link. + body = "Link to not parse: .{}.`".format(dummy_urls[2]) + self.send_stream_message(user_profile.email, "Denmark", body, "test") + assert_attachment_claimed(dummy_path_ids[2], False) + + # Finally, claim attachment 3. + body = "Link: {}".format(dummy_urls[2]) + self.send_stream_message(user_profile.email, "Denmark", body, "test") + assert_attachment_claimed(dummy_path_ids[2], True) + assert_attachment_claimed(dummy_path_ids[1], False) def test_finds_all_links(self) -> None: msg_ids = [] @@ -3645,7 +3668,8 @@ class MessageHasKeywordsTest(ZulipTestCase): self.assertTrue(msg.has_link) self.update_message(msg, 'a') self.assertFalse(msg.has_link) - self.update_message(msg, 'a http://foo.com') + # Check in blockquotes work + self.update_message(msg, '> http://bar.com') self.assertTrue(msg.has_link) self.update_message(msg, 'a `http://foo.com`') self.assertFalse(msg.has_link) @@ -3667,6 +3691,29 @@ class MessageHasKeywordsTest(ZulipTestCase): self.update_message(msgs[0], 'No Image Again') self.assertFalse(msgs[0].has_image) + def test_has_attachment(self) -> None: + # This test could belong in AttachmentTest as well, but + # we keep it with other 'has:' tests. + hamlet = self.example_user('hamlet') + dummy_path_ids = self.setup_dummy_attachments(hamlet) + dummy_urls = ["http://localhost:9991/user_uploads/{}".format(x) for x in dummy_path_ids] + self.subscribe(hamlet, "Denmark") + + body = ("Files ...[zulip.txt]({}) {} {}").format(dummy_urls[0], dummy_urls[1], dummy_urls[2]) + + msg_id = self.send_stream_message(hamlet.email, "Denmark", body, "test") + msg = Message.objects.get(id=msg_id) + self.assertTrue(msg.has_attachment) + self.update_message(msg, 'No Attachments') + self.assertFalse(msg.has_attachment) + self.update_message(msg, body) + self.assertTrue(msg.has_attachment) + self.update_message(msg, 'Link in code: `{}`'.format(dummy_urls[1])) + self.assertFalse(msg.has_attachment) + # Test blockquotes + self.update_message(msg, '> {}'.format(dummy_urls[1])) + self.assertTrue(msg.has_attachment) + class MissedMessageTest(ZulipTestCase): def test_presence_idle_user_ids(self) -> None: UserPresence.objects.all().delete() diff --git a/zerver/tests/test_retention.py b/zerver/tests/test_retention.py index c9dcdae5e8..b17b61a95c 100644 --- a/zerver/tests/test_retention.py +++ b/zerver/tests/test_retention.py @@ -143,9 +143,9 @@ class ArchiveMessagesTestingBase(RetentionTestingBase): create_attachment(file_name, path_id, user_profile, size) self.subscribe(user_profile, "Denmark") - body = ("Some files here ...[zulip.txt](http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + - "http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + - "http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id) + body = ("Some files here ... [zulip.txt](http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + + " http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + + " http://localhost:9991/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id) expired_message_id = self.send_stream_message(sender_email, "Denmark", body) actual_message_id = self.send_stream_message(sender_email, "Denmark", body)