From 42aea68df3cc9e58c8e8ac857aab5bc36022129a Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Thu, 31 May 2018 10:13:56 -0700 Subject: [PATCH] export: Automate validation of ALL_ZERVER_TABLES. This should help make it explicit whenever we add a new table to Zulip that we need to correctly categorize it for whether it will be included in the data export, or not. --- zerver/lib/export.py | 143 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/zerver/lib/export.py b/zerver/lib/export.py index dddd426bf8..9448cebb36 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -1,6 +1,8 @@ import datetime from boto.s3.connection import S3Connection +from django.apps import apps from django.conf import settings +from django.db import connection from django.forms.models import model_to_dict from django.utils.timezone import make_aware as timezone_make_aware from django.utils.timezone import utc as timezone_utc @@ -52,65 +54,141 @@ realm_tables = [("zerver_defaultstream", DefaultStream, "defaultstream"), ("zerver_realmfilter", RealmFilter, "realmfilter")] # List[Tuple[TableName, Any, str]] -ALL_ZERVER_TABLES = [ - # TODO: get a linter to ensure that this list is actually complete. +ALL_ZERVER_TABLES = { + 'analytics_anomaly', + 'analytics_fillstate', + 'analytics_installationcount', + 'analytics_realmcount', + 'analytics_streamcount', + 'analytics_usercount', + 'otp_static_staticdevice', + 'otp_static_statictoken', + 'otp_totp_totpdevice', + 'social_auth_association', + 'social_auth_code', + 'social_auth_nonce', + 'social_auth_partial', + 'social_auth_usersocialauth', + 'two_factor_phonedevice', + 'zerver_archivedattachment', + 'zerver_archivedattachment_messages', + 'zerver_archivedmessage', + 'zerver_archivedusermessage', 'zerver_attachment', 'zerver_attachment_messages', + 'zerver_botconfigdata', + 'zerver_botstoragedata', 'zerver_client', 'zerver_customprofilefield', 'zerver_customprofilefieldvalue', 'zerver_defaultstream', + 'zerver_defaultstreamgroup', + 'zerver_defaultstreamgroup_streams', + 'zerver_emailchangestatus', 'zerver_huddle', 'zerver_message', + 'zerver_multiuseinvite', + 'zerver_multiuseinvite_streams', 'zerver_preregistrationuser', 'zerver_preregistrationuser_streams', 'zerver_pushdevicetoken', + 'zerver_reaction', 'zerver_realm', + 'zerver_realmauditlog', 'zerver_realmdomain', 'zerver_realmemoji', 'zerver_realmfilter', 'zerver_recipient', 'zerver_scheduledemail', + 'zerver_scheduledmessage', + 'zerver_service', 'zerver_stream', + 'zerver_submessage', 'zerver_subscription', 'zerver_useractivity', 'zerver_useractivityinterval', + 'zerver_usergroup', + 'zerver_usergroupmembership', + 'zerver_userhotspot', 'zerver_usermessage', 'zerver_userpresence', 'zerver_userprofile', 'zerver_userprofile_groups', 'zerver_userprofile_user_permissions', -] + 'zerver_mutedtopic', +} -NON_EXPORTED_TABLES = [ +NON_EXPORTED_TABLES = { # These are known to either be altogether obsolete or # simply inappropriate for exporting (e.g. contains transient # data). + 'zerver_emailchangestatus', + 'zerver_multiuseinvite', + 'zerver_multiuseinvite_streams', 'zerver_preregistrationuser', 'zerver_preregistrationuser_streams', - 'zerver_pushdevicetoken', 'zerver_scheduledemail', 'zerver_userprofile_groups', 'zerver_userprofile_user_permissions', -] -assert set(NON_EXPORTED_TABLES).issubset(set(ALL_ZERVER_TABLES)) + # These are for unfinished features + 'zerver_defaultstreamgroup', + 'zerver_defaultstreamgroup_streams', + 'zerver_scheduledmessage', + 'zerver_submessage', + 'two_factor_phonedevice', + 'otp_static_staticdevice', + 'otp_static_statictoken', + 'otp_totp_totpdevice', + # These archive tables probably should not be exported (they are for internal correctness) + 'zerver_archivedmessage', + 'zerver_archivedusermessage', + 'zerver_archivedattachment', + 'zerver_archivedattachment_messages', -IMPLICIT_TABLES = [ + # Social auth tables are not needed post-export + 'social_auth_association', + 'social_auth_code', + 'social_auth_nonce', + 'social_auth_partial', + 'social_auth_usersocialauth', + + # We will likely never want to migrate these tables + 'analytics_fillstate', + 'analytics_installationcount', + # These analytics tables, however, should ideally be in the export. + 'analytics_anomaly', + 'analytics_realmcount', + 'analytics_streamcount', + 'analytics_usercount', + + # The fact that these are not exported is a bug + 'zerver_botstoragedata', + 'zerver_botconfigdata', + 'zerver_mutedtopic', + 'zerver_realmauditlog', + 'zerver_pushdevicetoken', + 'zerver_service', + 'zerver_usergroup', + 'zerver_usergroupmembership', + 'zerver_userhotspot', +} + +IMPLICIT_TABLES = { # ManyToMany relationships are exported implicitly. 'zerver_attachment_messages', -] -assert set(IMPLICIT_TABLES).issubset(set(ALL_ZERVER_TABLES)) +} -ATTACHMENT_TABLES = [ +ATTACHMENT_TABLES = { 'zerver_attachment', -] -assert set(ATTACHMENT_TABLES).issubset(set(ALL_ZERVER_TABLES)) +} -MESSAGE_TABLES = [ +MESSAGE_TABLES = { # message tables get special treatment, because they're so big 'zerver_message', 'zerver_usermessage', -] + # zerver_reaction belongs here, since it's added late + 'zerver_reaction', +} DATE_FIELDS = { 'zerver_attachment': ['create_time'], @@ -124,11 +202,38 @@ DATE_FIELDS = { } # type: Dict[TableName, List[Field]] def sanity_check_output(data: TableData) -> None: + # First, we verify that the export tool has a declared + # configuration for every table. + target_models = ( + list(apps.get_app_config('analytics').get_models(include_auto_created=True)) + + list(apps.get_app_config('django_otp').get_models(include_auto_created=True)) + + list(apps.get_app_config('otp_static').get_models(include_auto_created=True)) + + list(apps.get_app_config('otp_totp').get_models(include_auto_created=True)) + + list(apps.get_app_config('social_django').get_models(include_auto_created=True)) + + list(apps.get_app_config('two_factor').get_models(include_auto_created=True)) + + list(apps.get_app_config('zerver').get_models(include_auto_created=True)) + ) + all_tables_db = set(model._meta.db_table for model in target_models) + + # These assertion statements will fire when we add a new database + # table that is not included in Zulip's data exports. Generally, + # you can add your new table to `ALL_ZERVER_TABLES` and + # `NON_EXPORTED_TABLES` during early work on a new feature so that + # CI passes. + # + # We'll want to make sure we handle it for exports before + # releasing the new feature, but doing so correctly requires some + # expertise on this export system. + assert ALL_ZERVER_TABLES == all_tables_db + assert NON_EXPORTED_TABLES.issubset(ALL_ZERVER_TABLES) + assert IMPLICIT_TABLES.issubset(ALL_ZERVER_TABLES) + assert ATTACHMENT_TABLES.issubset(ALL_ZERVER_TABLES) + tables = set(ALL_ZERVER_TABLES) - tables -= set(NON_EXPORTED_TABLES) - tables -= set(IMPLICIT_TABLES) - tables -= set(MESSAGE_TABLES) - tables -= set(ATTACHMENT_TABLES) + tables -= NON_EXPORTED_TABLES + tables -= IMPLICIT_TABLES + tables -= MESSAGE_TABLES + tables -= ATTACHMENT_TABLES for table in tables: if table not in data: