From 4a43856ba7fc40ca7747ece41772bf191478ed42 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 May 2023 16:18:32 +0000 Subject: [PATCH] realm_export: Do not assume null extra_data is special. Fixes: #20197. --- web/templates/settings/admin_export_list.hbs | 4 +- zerver/lib/export.py | 21 ++++---- zerver/tests/test_realm_export.py | 50 +++++++++++++++++--- zerver/views/realm_export.py | 11 +++-- zerver/worker/queue_processors.py | 18 ++++--- 5 files changed, 72 insertions(+), 32 deletions(-) diff --git a/web/templates/settings/admin_export_list.hbs b/web/templates/settings/admin_export_list.hbs index e2f6b673ba..67102b2c12 100644 --- a/web/templates/settings/admin_export_list.hbs +++ b/web/templates/settings/admin_export_list.hbs @@ -18,11 +18,11 @@ {{/if}} - {{#unless time_deleted}} + {{#if url}} - {{/unless}} + {{/if}} {{/with}} diff --git a/zerver/lib/export.py b/zerver/lib/export.py index bbe13a63d0..04ff2473bf 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -2373,24 +2373,25 @@ def get_realm_exports_serialized(user: UserProfile) -> List[Dict[str, Any]]: ) exports_dict = {} for export in all_exports: - pending = True export_url = None deleted_timestamp = None failed_timestamp = None acting_user = export.acting_user + export_data = {} if export.extra_data is not None: - pending = False - export_data = orjson.loads(export.extra_data) - deleted_timestamp = export_data.get("deleted_timestamp") - failed_timestamp = export_data.get("failed_timestamp") - export_path = export_data.get("export_path") - if export_path and not deleted_timestamp: - export_url = zerver.lib.upload.upload_backend.get_export_tarball_url( - user.realm, export_path - ) + deleted_timestamp = export_data.get("deleted_timestamp") + failed_timestamp = export_data.get("failed_timestamp") + export_path = export_data.get("export_path") + + pending = deleted_timestamp is None and failed_timestamp is None and export_path is None + + if export_path is not None and not deleted_timestamp: + export_url = zerver.lib.upload.upload_backend.get_export_tarball_url( + user.realm, export_path + ) assert acting_user is not None exports_dict[export.id] = dict( diff --git a/zerver/tests/test_realm_export.py b/zerver/tests/test_realm_export.py index ba4a4fda75..51a57f855c 100644 --- a/zerver/tests/test_realm_export.py +++ b/zerver/tests/test_realm_export.py @@ -1,4 +1,5 @@ import os +from typing import Optional, Set from unittest.mock import patch import botocore.exceptions @@ -16,7 +17,7 @@ from zerver.lib.test_helpers import ( stdout_suppressed, use_s3_backend, ) -from zerver.models import RealmAuditLog +from zerver.models import Realm, RealmAuditLog from zerver.views.realm_export import export_realm @@ -117,18 +118,48 @@ class RealmExportTest(ZulipTestCase): tarball_path = create_dummy_file("test-export.tar.gz") # Test the export logic. - with patch("zerver.lib.export.do_export_realm", return_value=tarball_path) as mock_export: + def fake_export_realm( + realm: Realm, + output_dir: str, + threads: int, + exportable_user_ids: Optional[Set[int]] = None, + public_only: bool = False, + consent_message_id: Optional[int] = None, + export_as_active: Optional[bool] = None, + ) -> str: + self.assertEqual(realm, admin.realm) + self.assertEqual(public_only, True) + self.assertTrue(os.path.basename(output_dir).startswith("zulip-export-")) + self.assertEqual(threads, 6) + + # Check that the export shows up as in progress + result = self.client_get("/json/export/realm") + response_dict = self.assert_json_success(result) + export_dict = response_dict["exports"] + self.assert_length(export_dict, 1) + id = export_dict[0]["id"] + self.assertEqual(export_dict[0]["pending"], True) + self.assertIsNone(export_dict[0]["export_url"]) + self.assertIsNone(export_dict[0]["deleted_timestamp"]) + self.assertIsNone(export_dict[0]["failed_timestamp"]) + self.assertEqual(export_dict[0]["acting_user_id"], admin.id) + + # While the export is in progress, we can't delete it + result = self.client_delete(f"/json/export/realm/{id}") + self.assert_json_error(result, "Export still in progress") + + return tarball_path + + with patch( + "zerver.lib.export.do_export_realm", side_effect=fake_export_realm + ) as mock_export: with stdout_suppressed(), self.assertLogs(level="INFO") as info_logs: with self.captureOnCommitCallbacks(execute=True): result = self.client_post("/json/export/realm") self.assertTrue("INFO:root:Completed data export for zulip in " in info_logs.output[0]) + mock_export.assert_called_once() self.assert_json_success(result) self.assertFalse(os.path.exists(tarball_path)) - args = mock_export.call_args_list[0][1] - self.assertEqual(args["realm"], admin.realm) - self.assertEqual(args["public_only"], True) - self.assertTrue(os.path.basename(args["output_dir"]).startswith("zulip-export-")) - self.assertEqual(args["threads"], 6) # Get the entry and test that iago initiated it. audit_log_entry = RealmAuditLog.objects.filter( @@ -201,12 +232,17 @@ class RealmExportTest(ZulipTestCase): response_dict = self.assert_json_success(result) export_dict = response_dict["exports"] self.assert_length(export_dict, 1) + export_id = export_dict[0]["id"] self.assertEqual(export_dict[0]["pending"], False) self.assertIsNone(export_dict[0]["export_url"]) self.assertIsNone(export_dict[0]["deleted_timestamp"]) self.assertIsNotNone(export_dict[0]["failed_timestamp"]) self.assertEqual(export_dict[0]["acting_user_id"], admin.id) + # Check that we can't delete it + result = self.client_delete(f"/json/export/realm/{export_id}") + self.assert_json_error(result, "Export failed, nothing to delete") + def test_realm_export_rate_limited(self) -> None: admin = self.example_user("iago") self.login_user(admin) diff --git a/zerver/views/realm_export.py b/zerver/views/realm_export.py index 136c41815e..2911c92ce9 100644 --- a/zerver/views/realm_export.py +++ b/zerver/views/realm_export.py @@ -14,7 +14,6 @@ from zerver.lib.exceptions import JsonableError from zerver.lib.export import get_realm_exports_serialized from zerver.lib.queue import queue_json_publish from zerver.lib.response import json_success -from zerver.lib.utils import assert_is_not_none from zerver.models import RealmAuditLog, UserProfile @@ -104,8 +103,14 @@ def delete_realm_export(request: HttpRequest, user: UserProfile, export_id: int) except RealmAuditLog.DoesNotExist: raise JsonableError(_("Invalid data export ID")) - export_data = orjson.loads(assert_is_not_none(audit_log_entry.extra_data)) - if "deleted_timestamp" in export_data: + export_data = {} + if audit_log_entry.extra_data is not None: + export_data = orjson.loads(audit_log_entry.extra_data) + if export_data.get("deleted_timestamp") is not None: raise JsonableError(_("Export already deleted")) + if export_data.get("export_path") is None: + if export_data.get("failed_timestamp") is not None: + raise JsonableError(_("Export failed, nothing to delete")) + raise JsonableError(_("Export still in progress")) do_delete_realm_export(user, audit_log_entry) return json_success(request) diff --git a/zerver/worker/queue_processors.py b/zerver/worker/queue_processors.py index b44388c7a2..6f213c6929 100644 --- a/zerver/worker/queue_processors.py +++ b/zerver/worker/queue_processors.py @@ -1063,6 +1063,10 @@ class DeferredWorker(QueueProcessingWorker): output_dir = tempfile.mkdtemp(prefix="zulip-export-") export_event = RealmAuditLog.objects.get(id=event["id"]) user_profile = get_user_profile_by_id(event["user_profile_id"]) + extra_data = {} + if export_event.extra_data is not None: + extra_data = orjson.loads(export_event.extra_data) + logger.info( "Starting realm export for realm %s into %s, initiated by user_profile_id %s", realm.string_id, @@ -1079,11 +1083,8 @@ class DeferredWorker(QueueProcessingWorker): public_only=True, ) except Exception: - export_event.extra_data = orjson.dumps( - dict( - failed_timestamp=timezone_now().timestamp(), - ) - ).decode() + extra_data["failed_timestamp"] = timezone_now().timestamp() + export_event.extra_data = orjson.dumps(extra_data).decode() export_event.save(update_fields=["extra_data"]) logging.exception( "Data export for %s failed after %s", @@ -1097,11 +1098,8 @@ class DeferredWorker(QueueProcessingWorker): assert public_url is not None # Update the extra_data field now that the export is complete. - export_event.extra_data = orjson.dumps( - dict( - export_path=urllib.parse.urlparse(public_url).path, - ) - ).decode() + extra_data["export_path"] = urllib.parse.urlparse(public_url).path + export_event.extra_data = orjson.dumps(extra_data).decode() export_event.save(update_fields=["extra_data"]) # Send a private message notification letting the user who