provisioning: Don't rebuild DB if running migrations is sufficient.

This results in a significant optimization in the performance of
re-provisioning Zulip if all that you're doing is rebasing onto a
newer version of master (which just adds new migrations).

The change carries some risk of generating unpleasant-to-debug
situations, because if we merge a buggy migration and then later fix
it, some clients may not have a properly migrated database (and also,
this changes how populate_db commutes with migrations).  But it seems
worth it, given how much time is currently wasted by not having this.

Fixes: #9512.
This commit is contained in:
Aditya Bansal
2018-06-06 04:38:27 +05:30
committed by Tim Abbott
parent 65dc80fe9d
commit 1205e02c64
2 changed files with 57 additions and 8 deletions

View File

@@ -321,7 +321,7 @@ def main(options):
import django import django
django.setup() django.setup()
from zerver.lib.test_fixtures import template_database_status from zerver.lib.test_fixtures import template_database_status, run_db_migrations
try: try:
from zerver.lib.queue import SimpleQueueClient from zerver.lib.queue import SimpleQueueClient
@@ -344,6 +344,8 @@ def main(options):
if options.is_force or dev_template_db_status == 'needs_rebuild': if options.is_force or dev_template_db_status == 'needs_rebuild':
run(["tools/setup/postgres-init-dev-db"]) run(["tools/setup/postgres-init-dev-db"])
run(["tools/do-destroy-rebuild-database"]) run(["tools/do-destroy-rebuild-database"])
elif dev_template_db_status == 'run_migrations':
run_db_migrations('dev')
elif dev_template_db_status == 'current': elif dev_template_db_status == 'current':
print("No need to regenerate the dev DB.") print("No need to regenerate the dev DB.")
@@ -351,6 +353,8 @@ def main(options):
if options.is_force or test_template_db_status == 'needs_rebuild': if options.is_force or test_template_db_status == 'needs_rebuild':
run(["tools/setup/postgres-init-test-db"]) run(["tools/setup/postgres-init-test-db"])
run(["tools/do-destroy-rebuild-test-database"]) run(["tools/do-destroy-rebuild-test-database"])
elif test_template_db_status == 'run_migrations':
run_db_migrations('test')
elif test_template_db_status == 'current': elif test_template_db_status == 'current':
print("No need to regenerate the test DB.") print("No need to regenerate the test DB.")

View File

@@ -17,16 +17,35 @@ from django.core.management import call_command
from django.utils.module_loading import module_has_submodule from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path from scripts.lib.zulip_tools import get_dev_uuid_var_path, run
UUID_VAR_DIR = get_dev_uuid_var_path() UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile('[\W\-_]') FILENAME_SPLITTER = re.compile('[\W\-_]')
def run_db_migrations(platform: str) -> None:
if platform == 'dev':
migration_status_file = 'migration_status_dev'
settings = 'zproject.settings'
elif platform == 'test':
migration_status_file = 'migration_status_test'
settings = 'zproject.test_settings'
# We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
# the command line rather than just calling the migration
# functions, because Django doesn't support changing settings like
# what the database is as runtime.
run(['env', ('DJANGO_SETTINGS_MODULE=%s' % settings), './manage.py',
'migrate', '--no-input'])
run(['env', ('DJANGO_SETTINGS_MODULE=%s' % settings), './manage.py',
'get_migration_status', '--output=%s' % (migration_status_file)])
def run_generate_fixtures_if_required(use_force: bool=False) -> None: def run_generate_fixtures_if_required(use_force: bool=False) -> None:
generate_fixtures_command = ['tools/setup/generate-fixtures'] generate_fixtures_command = ['tools/setup/generate-fixtures']
test_template_db_status = template_database_status() test_template_db_status = template_database_status()
if use_force or test_template_db_status == 'needs_rebuild': if use_force or test_template_db_status == 'needs_rebuild':
generate_fixtures_command.append('--force') generate_fixtures_command.append('--force')
elif test_template_db_status == 'run_migrations':
run_db_migrations('test')
subprocess.check_call(generate_fixtures_command) subprocess.check_call(generate_fixtures_command)
def database_exists(database_name: str, **options: Any) -> bool: def database_exists(database_name: str, **options: Any) -> bool:
@@ -68,13 +87,31 @@ def get_migration_status(**options: Any) -> str:
output = out.read() output = out.read()
return re.sub('\x1b\[(1|0)m', '', output) return re.sub('\x1b\[(1|0)m', '', output)
def are_migrations_the_same(migration_file: str, **options: Any) -> bool: def extract_migrations_as_list(migration_status: str) -> List[str]:
MIGRATIONS_RE = re.compile('\[[X| ]\] (\d+_.+)\n')
return MIGRATIONS_RE.findall(migration_status)
def what_to_do_with_migrations(migration_file: str, **options: Any) -> str:
if not os.path.exists(migration_file): if not os.path.exists(migration_file):
return False return 'scrap'
with open(migration_file) as f: with open(migration_file) as f:
migration_content = f.read() previous_migration_status = f.read()
return migration_content == get_migration_status(**options) current_migration_status = get_migration_status(**options)
all_curr_migrations = extract_migrations_as_list(current_migration_status)
all_prev_migrations = extract_migrations_as_list(previous_migration_status)
if len(all_curr_migrations) < len(all_prev_migrations):
return 'scrap'
for migration in all_prev_migrations:
if migration not in all_curr_migrations:
return 'scrap'
if len(all_curr_migrations) == len(all_prev_migrations):
return 'migrations_are_latest'
return 'migrate'
def _get_hash_file_path(source_file_path: str, status_dir: str) -> str: def _get_hash_file_path(source_file_path: str, status_dir: str) -> str:
basename = os.path.basename(source_file_path) basename = os.path.basename(source_file_path)
@@ -152,8 +189,16 @@ def template_database_status(
settings_hash_status = all([check_setting_hash(setting_name, status_dir) settings_hash_status = all([check_setting_hash(setting_name, status_dir)
for setting_name in check_settings]) for setting_name in check_settings])
hash_status = files_hash_status and settings_hash_status hash_status = files_hash_status and settings_hash_status
if not hash_status:
return 'needs_rebuild'
if are_migrations_the_same(migration_status, settings=settings) and hash_status: migration_op = what_to_do_with_migrations(migration_status, settings=settings)
return 'current' if migration_op == 'scrap':
return 'needs_rebuild'
if migration_op == 'migrate':
return 'run_migrations'
return 'current'
return 'needs_rebuild' return 'needs_rebuild'