export: Only export messages that a consenting user can access.

As mentioned in the TODO this commit deletes, the export with member
consent system was failing to account for the fact that if consenting
users only have access to a subset of messages of a stream with
protected history, only that subset should be exported - rather than all
the stream's messages.
This commit is contained in:
Mateusz Mandera
2022-07-01 18:25:59 +02:00
committed by Tim Abbott
parent 569863ffa6
commit 318d7fd4cd
2 changed files with 51 additions and 7 deletions

View File

@@ -19,7 +19,7 @@ from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set,
import orjson
from django.apps import apps
from django.conf import settings
from django.db.models import Q
from django.db.models import Exists, OuterRef, Q
from django.forms.models import model_to_dict
from django.utils.timezone import is_naive as timezone_is_naive
from django.utils.timezone import make_aware as timezone_make_aware
@@ -1238,11 +1238,17 @@ def export_partial_message_files(
type=Recipient.STREAM, type_id__in=public_streams
).values_list("id", flat=True)
streams_with_protected_history_recipient_ids = Stream.objects.filter(
realm=realm, history_public_to_subscribers=False
).values_list("recipient_id", flat=True)
consented_recipient_ids = Subscription.objects.filter(
user_profile_id__in=consented_user_ids
).values_list("recipient_id", flat=True)
recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids)
recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids) - set(
streams_with_protected_history_recipient_ids
)
recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set
else:
recipient_ids_for_us = get_ids(response["zerver_recipient"])
@@ -1268,6 +1274,24 @@ def export_partial_message_files(
recipient__in=recipient_ids_for_us,
)
if consent_message_id is not None:
# Export with member consent requires some careful handling to make sure
# we only include messages that a consenting user can access.
has_usermessage_expression = Exists(
UserMessage.objects.filter(
user_profile_id__in=consented_user_ids, message_id=OuterRef("id")
)
)
messages_we_received_in_protected_history_streams = Message.objects.annotate(
has_usermessage=has_usermessage_expression
).filter(
sender__in=ids_of_our_possible_senders,
recipient_id__in=(
set(consented_recipient_ids) & set(streams_with_protected_history_recipient_ids)
),
has_usermessage=True,
)
# The above query is missing some messages that consenting
# users have access to, namely, PMs sent by one of the users
# in our export to another user (since the only subscriber to
@@ -1291,6 +1315,8 @@ def export_partial_message_files(
messages_we_received,
messages_we_sent_to_them,
]
if consent_message_id is not None:
message_queries.append(messages_we_received_in_protected_history_streams)
all_message_ids: Set[int] = set()