export: Automate validation of ALL_ZERVER_TABLES.

This should help make it explicit whenever we add a new table to Zulip
that we need to correctly categorize it for whether it will be
included in the data export, or not.
This commit is contained in:
Tim Abbott
2018-05-31 10:13:56 -07:00
parent 328136344a
commit 42aea68df3

View File

@@ -1,6 +1,8 @@
import datetime import datetime
from boto.s3.connection import S3Connection from boto.s3.connection import S3Connection
from django.apps import apps
from django.conf import settings from django.conf import settings
from django.db import connection
from django.forms.models import model_to_dict from django.forms.models import model_to_dict
from django.utils.timezone import make_aware as timezone_make_aware from django.utils.timezone import make_aware as timezone_make_aware
from django.utils.timezone import utc as timezone_utc from django.utils.timezone import utc as timezone_utc
@@ -52,65 +54,141 @@ realm_tables = [("zerver_defaultstream", DefaultStream, "defaultstream"),
("zerver_realmfilter", RealmFilter, "realmfilter")] # List[Tuple[TableName, Any, str]] ("zerver_realmfilter", RealmFilter, "realmfilter")] # List[Tuple[TableName, Any, str]]
ALL_ZERVER_TABLES = [ ALL_ZERVER_TABLES = {
# TODO: get a linter to ensure that this list is actually complete. 'analytics_anomaly',
'analytics_fillstate',
'analytics_installationcount',
'analytics_realmcount',
'analytics_streamcount',
'analytics_usercount',
'otp_static_staticdevice',
'otp_static_statictoken',
'otp_totp_totpdevice',
'social_auth_association',
'social_auth_code',
'social_auth_nonce',
'social_auth_partial',
'social_auth_usersocialauth',
'two_factor_phonedevice',
'zerver_archivedattachment',
'zerver_archivedattachment_messages',
'zerver_archivedmessage',
'zerver_archivedusermessage',
'zerver_attachment', 'zerver_attachment',
'zerver_attachment_messages', 'zerver_attachment_messages',
'zerver_botconfigdata',
'zerver_botstoragedata',
'zerver_client', 'zerver_client',
'zerver_customprofilefield', 'zerver_customprofilefield',
'zerver_customprofilefieldvalue', 'zerver_customprofilefieldvalue',
'zerver_defaultstream', 'zerver_defaultstream',
'zerver_defaultstreamgroup',
'zerver_defaultstreamgroup_streams',
'zerver_emailchangestatus',
'zerver_huddle', 'zerver_huddle',
'zerver_message', 'zerver_message',
'zerver_multiuseinvite',
'zerver_multiuseinvite_streams',
'zerver_preregistrationuser', 'zerver_preregistrationuser',
'zerver_preregistrationuser_streams', 'zerver_preregistrationuser_streams',
'zerver_pushdevicetoken', 'zerver_pushdevicetoken',
'zerver_reaction',
'zerver_realm', 'zerver_realm',
'zerver_realmauditlog',
'zerver_realmdomain', 'zerver_realmdomain',
'zerver_realmemoji', 'zerver_realmemoji',
'zerver_realmfilter', 'zerver_realmfilter',
'zerver_recipient', 'zerver_recipient',
'zerver_scheduledemail', 'zerver_scheduledemail',
'zerver_scheduledmessage',
'zerver_service',
'zerver_stream', 'zerver_stream',
'zerver_submessage',
'zerver_subscription', 'zerver_subscription',
'zerver_useractivity', 'zerver_useractivity',
'zerver_useractivityinterval', 'zerver_useractivityinterval',
'zerver_usergroup',
'zerver_usergroupmembership',
'zerver_userhotspot',
'zerver_usermessage', 'zerver_usermessage',
'zerver_userpresence', 'zerver_userpresence',
'zerver_userprofile', 'zerver_userprofile',
'zerver_userprofile_groups', 'zerver_userprofile_groups',
'zerver_userprofile_user_permissions', 'zerver_userprofile_user_permissions',
] 'zerver_mutedtopic',
}
NON_EXPORTED_TABLES = [ NON_EXPORTED_TABLES = {
# These are known to either be altogether obsolete or # These are known to either be altogether obsolete or
# simply inappropriate for exporting (e.g. contains transient # simply inappropriate for exporting (e.g. contains transient
# data). # data).
'zerver_emailchangestatus',
'zerver_multiuseinvite',
'zerver_multiuseinvite_streams',
'zerver_preregistrationuser', 'zerver_preregistrationuser',
'zerver_preregistrationuser_streams', 'zerver_preregistrationuser_streams',
'zerver_pushdevicetoken',
'zerver_scheduledemail', 'zerver_scheduledemail',
'zerver_userprofile_groups', 'zerver_userprofile_groups',
'zerver_userprofile_user_permissions', 'zerver_userprofile_user_permissions',
] # These are for unfinished features
assert set(NON_EXPORTED_TABLES).issubset(set(ALL_ZERVER_TABLES)) 'zerver_defaultstreamgroup',
'zerver_defaultstreamgroup_streams',
'zerver_scheduledmessage',
'zerver_submessage',
'two_factor_phonedevice',
'otp_static_staticdevice',
'otp_static_statictoken',
'otp_totp_totpdevice',
# These archive tables probably should not be exported (they are for internal correctness)
'zerver_archivedmessage',
'zerver_archivedusermessage',
'zerver_archivedattachment',
'zerver_archivedattachment_messages',
IMPLICIT_TABLES = [ # Social auth tables are not needed post-export
'social_auth_association',
'social_auth_code',
'social_auth_nonce',
'social_auth_partial',
'social_auth_usersocialauth',
# We will likely never want to migrate these tables
'analytics_fillstate',
'analytics_installationcount',
# These analytics tables, however, should ideally be in the export.
'analytics_anomaly',
'analytics_realmcount',
'analytics_streamcount',
'analytics_usercount',
# The fact that these are not exported is a bug
'zerver_botstoragedata',
'zerver_botconfigdata',
'zerver_mutedtopic',
'zerver_realmauditlog',
'zerver_pushdevicetoken',
'zerver_service',
'zerver_usergroup',
'zerver_usergroupmembership',
'zerver_userhotspot',
}
IMPLICIT_TABLES = {
# ManyToMany relationships are exported implicitly. # ManyToMany relationships are exported implicitly.
'zerver_attachment_messages', 'zerver_attachment_messages',
] }
assert set(IMPLICIT_TABLES).issubset(set(ALL_ZERVER_TABLES))
ATTACHMENT_TABLES = [ ATTACHMENT_TABLES = {
'zerver_attachment', 'zerver_attachment',
] }
assert set(ATTACHMENT_TABLES).issubset(set(ALL_ZERVER_TABLES))
MESSAGE_TABLES = [ MESSAGE_TABLES = {
# message tables get special treatment, because they're so big # message tables get special treatment, because they're so big
'zerver_message', 'zerver_message',
'zerver_usermessage', 'zerver_usermessage',
] # zerver_reaction belongs here, since it's added late
'zerver_reaction',
}
DATE_FIELDS = { DATE_FIELDS = {
'zerver_attachment': ['create_time'], 'zerver_attachment': ['create_time'],
@@ -124,11 +202,38 @@ DATE_FIELDS = {
} # type: Dict[TableName, List[Field]] } # type: Dict[TableName, List[Field]]
def sanity_check_output(data: TableData) -> None: def sanity_check_output(data: TableData) -> None:
# First, we verify that the export tool has a declared
# configuration for every table.
target_models = (
list(apps.get_app_config('analytics').get_models(include_auto_created=True)) +
list(apps.get_app_config('django_otp').get_models(include_auto_created=True)) +
list(apps.get_app_config('otp_static').get_models(include_auto_created=True)) +
list(apps.get_app_config('otp_totp').get_models(include_auto_created=True)) +
list(apps.get_app_config('social_django').get_models(include_auto_created=True)) +
list(apps.get_app_config('two_factor').get_models(include_auto_created=True)) +
list(apps.get_app_config('zerver').get_models(include_auto_created=True))
)
all_tables_db = set(model._meta.db_table for model in target_models)
# These assertion statements will fire when we add a new database
# table that is not included in Zulip's data exports. Generally,
# you can add your new table to `ALL_ZERVER_TABLES` and
# `NON_EXPORTED_TABLES` during early work on a new feature so that
# CI passes.
#
# We'll want to make sure we handle it for exports before
# releasing the new feature, but doing so correctly requires some
# expertise on this export system.
assert ALL_ZERVER_TABLES == all_tables_db
assert NON_EXPORTED_TABLES.issubset(ALL_ZERVER_TABLES)
assert IMPLICIT_TABLES.issubset(ALL_ZERVER_TABLES)
assert ATTACHMENT_TABLES.issubset(ALL_ZERVER_TABLES)
tables = set(ALL_ZERVER_TABLES) tables = set(ALL_ZERVER_TABLES)
tables -= set(NON_EXPORTED_TABLES) tables -= NON_EXPORTED_TABLES
tables -= set(IMPLICIT_TABLES) tables -= IMPLICIT_TABLES
tables -= set(MESSAGE_TABLES) tables -= MESSAGE_TABLES
tables -= set(ATTACHMENT_TABLES) tables -= ATTACHMENT_TABLES
for table in tables: for table in tables:
if table not in data: if table not in data: