export: Only export messages that a consenting user can access.

As mentioned in the TODO this commit deletes, the export with member
consent system was failing to account for the fact that if consenting
users only have access to a subset of messages of a stream with
protected history, only that subset should be exported - rather than all
the stream's messages.
This commit is contained in:
Mateusz Mandera
2022-07-01 18:25:59 +02:00
committed by Tim Abbott
parent 569863ffa6
commit 318d7fd4cd
2 changed files with 51 additions and 7 deletions

View File

@@ -19,7 +19,7 @@ from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set,
import orjson import orjson
from django.apps import apps from django.apps import apps
from django.conf import settings from django.conf import settings
from django.db.models import Q from django.db.models import Exists, OuterRef, Q
from django.forms.models import model_to_dict from django.forms.models import model_to_dict
from django.utils.timezone import is_naive as timezone_is_naive from django.utils.timezone import is_naive as timezone_is_naive
from django.utils.timezone import make_aware as timezone_make_aware from django.utils.timezone import make_aware as timezone_make_aware
@@ -1238,11 +1238,17 @@ def export_partial_message_files(
type=Recipient.STREAM, type_id__in=public_streams type=Recipient.STREAM, type_id__in=public_streams
).values_list("id", flat=True) ).values_list("id", flat=True)
streams_with_protected_history_recipient_ids = Stream.objects.filter(
realm=realm, history_public_to_subscribers=False
).values_list("recipient_id", flat=True)
consented_recipient_ids = Subscription.objects.filter( consented_recipient_ids = Subscription.objects.filter(
user_profile_id__in=consented_user_ids user_profile_id__in=consented_user_ids
).values_list("recipient_id", flat=True) ).values_list("recipient_id", flat=True)
recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids) recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids) - set(
streams_with_protected_history_recipient_ids
)
recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set
else: else:
recipient_ids_for_us = get_ids(response["zerver_recipient"]) recipient_ids_for_us = get_ids(response["zerver_recipient"])
@@ -1268,6 +1274,24 @@ def export_partial_message_files(
recipient__in=recipient_ids_for_us, recipient__in=recipient_ids_for_us,
) )
if consent_message_id is not None:
# Export with member consent requires some careful handling to make sure
# we only include messages that a consenting user can access.
has_usermessage_expression = Exists(
UserMessage.objects.filter(
user_profile_id__in=consented_user_ids, message_id=OuterRef("id")
)
)
messages_we_received_in_protected_history_streams = Message.objects.annotate(
has_usermessage=has_usermessage_expression
).filter(
sender__in=ids_of_our_possible_senders,
recipient_id__in=(
set(consented_recipient_ids) & set(streams_with_protected_history_recipient_ids)
),
has_usermessage=True,
)
# The above query is missing some messages that consenting # The above query is missing some messages that consenting
# users have access to, namely, PMs sent by one of the users # users have access to, namely, PMs sent by one of the users
# in our export to another user (since the only subscriber to # in our export to another user (since the only subscriber to
@@ -1291,6 +1315,8 @@ def export_partial_message_files(
messages_we_received, messages_we_received,
messages_we_sent_to_them, messages_we_sent_to_them,
] ]
if consent_message_id is not None:
message_queries.append(messages_we_received_in_protected_history_streams)
all_message_ids: Set[int] = set() all_message_ids: Set[int] = set()

View File

@@ -512,10 +512,15 @@ class RealmImportExportTest(ExportFile):
create_stream_if_needed(realm, "Private B", invite_only=True) create_stream_if_needed(realm, "Private B", invite_only=True)
self.subscribe(self.example_user("prospero"), "Private B") self.subscribe(self.example_user("prospero"), "Private B")
stream_b_message_id = self.send_stream_message( stream_b_first_message_id = self.send_stream_message(
self.example_user("prospero"), "Private B", "Hello stream B" self.example_user("prospero"), "Private B", "Hello stream B"
) )
# Hamlet subscribes now, so due to protected history, will not have access to the first message.
# This means that his consent will not be sufficient for the export of that message.
self.subscribe(self.example_user("hamlet"), "Private B") self.subscribe(self.example_user("hamlet"), "Private B")
stream_b_second_message_id = self.send_stream_message(
self.example_user("prospero"), "Private B", "Hello again stream B"
)
create_stream_if_needed(realm, "Private C", invite_only=True) create_stream_if_needed(realm, "Private C", invite_only=True)
self.subscribe(self.example_user("othello"), "Private C") self.subscribe(self.example_user("othello"), "Private C")
@@ -524,6 +529,17 @@ class RealmImportExportTest(ExportFile):
self.example_user("othello"), "Private C", "Hello stream C" self.example_user("othello"), "Private C", "Hello stream C"
) )
create_stream_if_needed(
realm, "Private D", invite_only=True, history_public_to_subscribers=True
)
self.subscribe(self.example_user("prospero"), "Private D")
self.send_stream_message(self.example_user("prospero"), "Private D", "Hello stream D")
# Hamlet subscribes now, but due to the stream having public history to subscribers, that doesn't
# matter and he his consent is sufficient to export also messages sent before he was added
# to the stream.
self.subscribe(self.example_user("hamlet"), "Private D")
self.send_stream_message(self.example_user("prospero"), "Private D", "Hello again stream D")
# Create huddles # Create huddles
self.send_huddle_message( self.send_huddle_message(
self.example_user("iago"), [self.example_user("cordelia"), self.example_user("AARON")] self.example_user("iago"), [self.example_user("cordelia"), self.example_user("AARON")]
@@ -599,6 +615,7 @@ class RealmImportExportTest(ExportFile):
"Private A", "Private A",
"Private B", "Private B",
"Private C", "Private C",
"Private D",
}, },
) )
@@ -627,8 +644,10 @@ class RealmImportExportTest(ExportFile):
).values_list("id", flat=True) ).values_list("id", flat=True)
# Messages from Private stream C are not exported since no member gave consent # Messages from Private stream C are not exported since no member gave consent
# Only the second message from Private stream B is exported, so that gets handled
# separately.
private_stream_ids = Stream.objects.filter( private_stream_ids = Stream.objects.filter(
name__in=["Private A", "Private B", "core team"] name__in=["Private A", "Private D", "core team"]
).values_list("id", flat=True) ).values_list("id", flat=True)
private_stream_recipients = Recipient.objects.filter( private_stream_recipients = Recipient.objects.filter(
type_id__in=private_stream_ids, type=Recipient.STREAM type_id__in=private_stream_ids, type=Recipient.STREAM
@@ -662,14 +681,13 @@ class RealmImportExportTest(ExportFile):
exported_msg_ids = ( exported_msg_ids = (
set(public_stream_message_ids) set(public_stream_message_ids)
| set(private_stream_message_ids) | set(private_stream_message_ids)
| set([stream_b_second_message_id])
| set(exported_pm_ids) | set(exported_pm_ids)
| set(exported_huddle_ids) | set(exported_huddle_ids)
) )
self.assertEqual(self.get_set(data["zerver_message"], "id"), exported_msg_ids) self.assertEqual(self.get_set(data["zerver_message"], "id"), exported_msg_ids)
# TODO: This behavior is wrong and should be fixed. The message should not be exported self.assertNotIn(stream_b_first_message_id, exported_msg_ids)
# since it was sent before the only consented user iago joined the stream.
self.assertIn(stream_b_message_id, exported_msg_ids)
self.assertNotIn(stream_c_message_id, exported_msg_ids) self.assertNotIn(stream_c_message_id, exported_msg_ids)
self.assertNotIn(huddle_c_message_id, exported_msg_ids) self.assertNotIn(huddle_c_message_id, exported_msg_ids)