mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	export: Only export messages that a consenting user can access.
As mentioned in the TODO this commit deletes, the export with member consent system was failing to account for the fact that if consenting users only have access to a subset of messages of a stream with protected history, only that subset should be exported - rather than all the stream's messages.
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							569863ffa6
						
					
				
				
					commit
					318d7fd4cd
				
			@@ -19,7 +19,7 @@ from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set,
 | 
				
			|||||||
import orjson
 | 
					import orjson
 | 
				
			||||||
from django.apps import apps
 | 
					from django.apps import apps
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db.models import Q
 | 
					from django.db.models import Exists, OuterRef, Q
 | 
				
			||||||
from django.forms.models import model_to_dict
 | 
					from django.forms.models import model_to_dict
 | 
				
			||||||
from django.utils.timezone import is_naive as timezone_is_naive
 | 
					from django.utils.timezone import is_naive as timezone_is_naive
 | 
				
			||||||
from django.utils.timezone import make_aware as timezone_make_aware
 | 
					from django.utils.timezone import make_aware as timezone_make_aware
 | 
				
			||||||
@@ -1238,11 +1238,17 @@ def export_partial_message_files(
 | 
				
			|||||||
            type=Recipient.STREAM, type_id__in=public_streams
 | 
					            type=Recipient.STREAM, type_id__in=public_streams
 | 
				
			||||||
        ).values_list("id", flat=True)
 | 
					        ).values_list("id", flat=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        streams_with_protected_history_recipient_ids = Stream.objects.filter(
 | 
				
			||||||
 | 
					            realm=realm, history_public_to_subscribers=False
 | 
				
			||||||
 | 
					        ).values_list("recipient_id", flat=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        consented_recipient_ids = Subscription.objects.filter(
 | 
					        consented_recipient_ids = Subscription.objects.filter(
 | 
				
			||||||
            user_profile_id__in=consented_user_ids
 | 
					            user_profile_id__in=consented_user_ids
 | 
				
			||||||
        ).values_list("recipient_id", flat=True)
 | 
					        ).values_list("recipient_id", flat=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids)
 | 
					        recipient_ids_set = set(public_stream_recipient_ids) | set(consented_recipient_ids) - set(
 | 
				
			||||||
 | 
					            streams_with_protected_history_recipient_ids
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
        recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set
 | 
					        recipient_ids_for_us = get_ids(response["zerver_recipient"]) & recipient_ids_set
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        recipient_ids_for_us = get_ids(response["zerver_recipient"])
 | 
					        recipient_ids_for_us = get_ids(response["zerver_recipient"])
 | 
				
			||||||
@@ -1268,6 +1274,24 @@ def export_partial_message_files(
 | 
				
			|||||||
            recipient__in=recipient_ids_for_us,
 | 
					            recipient__in=recipient_ids_for_us,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if consent_message_id is not None:
 | 
				
			||||||
 | 
					            # Export with member consent requires some careful handling to make sure
 | 
				
			||||||
 | 
					            # we only include messages that a consenting user can access.
 | 
				
			||||||
 | 
					            has_usermessage_expression = Exists(
 | 
				
			||||||
 | 
					                UserMessage.objects.filter(
 | 
				
			||||||
 | 
					                    user_profile_id__in=consented_user_ids, message_id=OuterRef("id")
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            messages_we_received_in_protected_history_streams = Message.objects.annotate(
 | 
				
			||||||
 | 
					                has_usermessage=has_usermessage_expression
 | 
				
			||||||
 | 
					            ).filter(
 | 
				
			||||||
 | 
					                sender__in=ids_of_our_possible_senders,
 | 
				
			||||||
 | 
					                recipient_id__in=(
 | 
				
			||||||
 | 
					                    set(consented_recipient_ids) & set(streams_with_protected_history_recipient_ids)
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                has_usermessage=True,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # The above query is missing some messages that consenting
 | 
					        # The above query is missing some messages that consenting
 | 
				
			||||||
        # users have access to, namely, PMs sent by one of the users
 | 
					        # users have access to, namely, PMs sent by one of the users
 | 
				
			||||||
        # in our export to another user (since the only subscriber to
 | 
					        # in our export to another user (since the only subscriber to
 | 
				
			||||||
@@ -1291,6 +1315,8 @@ def export_partial_message_files(
 | 
				
			|||||||
            messages_we_received,
 | 
					            messages_we_received,
 | 
				
			||||||
            messages_we_sent_to_them,
 | 
					            messages_we_sent_to_them,
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 | 
					        if consent_message_id is not None:
 | 
				
			||||||
 | 
					            message_queries.append(messages_we_received_in_protected_history_streams)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    all_message_ids: Set[int] = set()
 | 
					    all_message_ids: Set[int] = set()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -512,10 +512,15 @@ class RealmImportExportTest(ExportFile):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        create_stream_if_needed(realm, "Private B", invite_only=True)
 | 
					        create_stream_if_needed(realm, "Private B", invite_only=True)
 | 
				
			||||||
        self.subscribe(self.example_user("prospero"), "Private B")
 | 
					        self.subscribe(self.example_user("prospero"), "Private B")
 | 
				
			||||||
        stream_b_message_id = self.send_stream_message(
 | 
					        stream_b_first_message_id = self.send_stream_message(
 | 
				
			||||||
            self.example_user("prospero"), "Private B", "Hello stream B"
 | 
					            self.example_user("prospero"), "Private B", "Hello stream B"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        # Hamlet subscribes now, so due to protected history, will not have access to the first message.
 | 
				
			||||||
 | 
					        # This means that his consent will not be sufficient for the export of that message.
 | 
				
			||||||
        self.subscribe(self.example_user("hamlet"), "Private B")
 | 
					        self.subscribe(self.example_user("hamlet"), "Private B")
 | 
				
			||||||
 | 
					        stream_b_second_message_id = self.send_stream_message(
 | 
				
			||||||
 | 
					            self.example_user("prospero"), "Private B", "Hello again stream B"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        create_stream_if_needed(realm, "Private C", invite_only=True)
 | 
					        create_stream_if_needed(realm, "Private C", invite_only=True)
 | 
				
			||||||
        self.subscribe(self.example_user("othello"), "Private C")
 | 
					        self.subscribe(self.example_user("othello"), "Private C")
 | 
				
			||||||
@@ -524,6 +529,17 @@ class RealmImportExportTest(ExportFile):
 | 
				
			|||||||
            self.example_user("othello"), "Private C", "Hello stream C"
 | 
					            self.example_user("othello"), "Private C", "Hello stream C"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        create_stream_if_needed(
 | 
				
			||||||
 | 
					            realm, "Private D", invite_only=True, history_public_to_subscribers=True
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.subscribe(self.example_user("prospero"), "Private D")
 | 
				
			||||||
 | 
					        self.send_stream_message(self.example_user("prospero"), "Private D", "Hello stream D")
 | 
				
			||||||
 | 
					        # Hamlet subscribes now, but due to the stream having public history to subscribers, that doesn't
 | 
				
			||||||
 | 
					        # matter and he his consent is sufficient to export also messages sent before he was added
 | 
				
			||||||
 | 
					        # to the stream.
 | 
				
			||||||
 | 
					        self.subscribe(self.example_user("hamlet"), "Private D")
 | 
				
			||||||
 | 
					        self.send_stream_message(self.example_user("prospero"), "Private D", "Hello again stream D")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Create huddles
 | 
					        # Create huddles
 | 
				
			||||||
        self.send_huddle_message(
 | 
					        self.send_huddle_message(
 | 
				
			||||||
            self.example_user("iago"), [self.example_user("cordelia"), self.example_user("AARON")]
 | 
					            self.example_user("iago"), [self.example_user("cordelia"), self.example_user("AARON")]
 | 
				
			||||||
@@ -599,6 +615,7 @@ class RealmImportExportTest(ExportFile):
 | 
				
			|||||||
                "Private A",
 | 
					                "Private A",
 | 
				
			||||||
                "Private B",
 | 
					                "Private B",
 | 
				
			||||||
                "Private C",
 | 
					                "Private C",
 | 
				
			||||||
 | 
					                "Private D",
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -627,8 +644,10 @@ class RealmImportExportTest(ExportFile):
 | 
				
			|||||||
        ).values_list("id", flat=True)
 | 
					        ).values_list("id", flat=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Messages from Private stream C are not exported since no member gave consent
 | 
					        # Messages from Private stream C are not exported since no member gave consent
 | 
				
			||||||
 | 
					        # Only the second message from Private stream B is exported, so that gets handled
 | 
				
			||||||
 | 
					        # separately.
 | 
				
			||||||
        private_stream_ids = Stream.objects.filter(
 | 
					        private_stream_ids = Stream.objects.filter(
 | 
				
			||||||
            name__in=["Private A", "Private B", "core team"]
 | 
					            name__in=["Private A", "Private D", "core team"]
 | 
				
			||||||
        ).values_list("id", flat=True)
 | 
					        ).values_list("id", flat=True)
 | 
				
			||||||
        private_stream_recipients = Recipient.objects.filter(
 | 
					        private_stream_recipients = Recipient.objects.filter(
 | 
				
			||||||
            type_id__in=private_stream_ids, type=Recipient.STREAM
 | 
					            type_id__in=private_stream_ids, type=Recipient.STREAM
 | 
				
			||||||
@@ -662,14 +681,13 @@ class RealmImportExportTest(ExportFile):
 | 
				
			|||||||
        exported_msg_ids = (
 | 
					        exported_msg_ids = (
 | 
				
			||||||
            set(public_stream_message_ids)
 | 
					            set(public_stream_message_ids)
 | 
				
			||||||
            | set(private_stream_message_ids)
 | 
					            | set(private_stream_message_ids)
 | 
				
			||||||
 | 
					            | set([stream_b_second_message_id])
 | 
				
			||||||
            | set(exported_pm_ids)
 | 
					            | set(exported_pm_ids)
 | 
				
			||||||
            | set(exported_huddle_ids)
 | 
					            | set(exported_huddle_ids)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(self.get_set(data["zerver_message"], "id"), exported_msg_ids)
 | 
					        self.assertEqual(self.get_set(data["zerver_message"], "id"), exported_msg_ids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # TODO: This behavior is wrong and should be fixed. The message should not be exported
 | 
					        self.assertNotIn(stream_b_first_message_id, exported_msg_ids)
 | 
				
			||||||
        # since it was sent before the only consented user iago joined the stream.
 | 
					 | 
				
			||||||
        self.assertIn(stream_b_message_id, exported_msg_ids)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertNotIn(stream_c_message_id, exported_msg_ids)
 | 
					        self.assertNotIn(stream_c_message_id, exported_msg_ids)
 | 
				
			||||||
        self.assertNotIn(huddle_c_message_id, exported_msg_ids)
 | 
					        self.assertNotIn(huddle_c_message_id, exported_msg_ids)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user