provision: Manage digests more rigorously.

We now have two functions related to digests
for processes:

    is_digest_obsolete
    write_digest_file

In most cases we now **wait** to write the
digest file until after we've successfully
run a process with its new inputs.

In one place, for database migrations, we
continue to write the digest optimistically.
We'll want to fix this, but it requires a
little more code cleanup.

Here is the typical sequence of events:

    NEVER RUN -
        is_digest_obsolete returns True
        quickly (we don't compute a hash)

        write_digest_file does a write (duh)

    AFTER NO CHANGES -
        is_digest_obsolete returns False
        after reading one file for old
        hash and multiple files to compute
        hash

        most callers skip write_digest_file

        (no files are changed)

    AFTER SOME CHANGES -
        is_digest_obsolete returns False
        after doing full checks

        most callers call write_digest_file
        *after* running a process
This commit is contained in:
Steve Howell
2020-04-20 13:16:16 +00:00
committed by Tim Abbott
parent b280f73c77
commit 51f74a7fd8
3 changed files with 75 additions and 26 deletions

View File

@@ -393,24 +393,38 @@ def path_version_digest(paths: List[str],
return sha1sum.hexdigest()
def file_or_package_hash_updated(hash_name: str,
paths: List[str],
package_versions: List[str]=[]) -> bool:
# Check whether the files or package_versions passed as arguments
# changed compared to the last execution.
hash_path = os.path.join(get_dev_uuid_var_path(), hash_name)
def is_digest_obsolete(hash_name: str,
paths: List[str],
package_versions: List[str]=[]) -> bool:
# Check whether the `paths` contents or
# `package_versions` have changed.
last_hash_path = os.path.join(get_dev_uuid_var_path(), hash_name)
try:
with open(last_hash_path) as f:
old_hash = f.read()
except FileNotFoundError:
# This is normal for a fresh checkout--a missing
# digest is an obsolete digest.
return True
new_hash = path_version_digest(paths, package_versions)
with open(hash_path, 'a+') as hash_file:
hash_file.seek(0)
last_hash = hash_file.read()
return new_hash != old_hash
if (new_hash != last_hash):
hash_file.seek(0)
hash_file.truncate()
hash_file.write(new_hash)
return True
return False
def write_new_digest(hash_name: str,
paths: List[str],
package_versions: List[str]=[]) -> None:
hash_path = os.path.join(get_dev_uuid_var_path(), hash_name)
new_hash = path_version_digest(paths, package_versions)
with open(hash_path, 'w') as f:
f.write(new_hash)
# Be a little verbose here--our callers ensure we
# only write new digests when things have changed, and
# making this system more transparent to developers
# can help them troubleshoot provisioning glitches.
print('New digest written to: ' + hash_path)
def is_root() -> bool:
if 'posix' in os.name and os.geteuid() == 0:

View File

@@ -11,7 +11,7 @@ ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__f
sys.path.append(ZULIP_PATH)
from scripts.lib.zulip_tools import run, OKBLUE, ENDC, \
get_dev_uuid_var_path, file_or_package_hash_updated
get_dev_uuid_var_path, is_digest_obsolete, write_new_digest
from version import PROVISION_VERSION
from pygments import __version__ as pygments_version
@@ -119,7 +119,7 @@ def need_to_run_build_pygments_data() -> bool:
if not os.path.exists("static/generated/pygments_data.json"):
return True
return file_or_package_hash_updated(
return is_digest_obsolete(
"build_pygments_data_hash",
build_pygments_data_paths(),
[pygments_version]
@@ -131,7 +131,7 @@ def need_to_run_compilemessages() -> bool:
print('Need to run compilemessages due to missing language_name_map.json')
return True
return file_or_package_hash_updated(
return is_digest_obsolete(
"last_compilemessages_hash",
compilemessages_paths(),
)
@@ -140,7 +140,7 @@ def need_to_run_inline_email_css() -> bool:
if not os.path.exists('templates/zerver/emails/compiled/'):
return True
return file_or_package_hash_updated(
return is_digest_obsolete(
"last_email_source_files_hash",
inline_email_css_paths(),
)
@@ -164,11 +164,20 @@ def main(options: argparse.Namespace) -> int:
if options.is_force or need_to_run_build_pygments_data():
run(["tools/setup/build_pygments_data"])
write_new_digest(
'build_pygments_data_hash',
build_pygments_data_paths(),
[pygments_version]
)
else:
print("No need to run `tools/setup/build_pygments_data`.")
if options.is_force or need_to_run_inline_email_css():
run(["scripts/setup/inline_email_css.py"])
write_new_digest(
"last_email_source_files_hash",
inline_email_css_paths(),
)
else:
print("No need to run `scripts/setup/inline_email_css.py`.")
@@ -221,6 +230,10 @@ def main(options: argparse.Namespace) -> int:
if options.is_force or need_to_run_compilemessages():
run(["./manage.py", "compilemessages"])
write_new_digest(
"last_compilemessages_hash",
compilemessages_paths(),
)
else:
print("No need to run `manage.py compilemessages`.")

View File

@@ -20,8 +20,10 @@ from django.core.management import call_command
from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
file_or_package_hash_updated, TEMPLATE_DATABASE_DIR
from scripts.lib.zulip_tools import (
get_dev_uuid_var_path, run, TEMPLATE_DATABASE_DIR,
is_digest_obsolete, write_new_digest,
)
UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile(r'[\W\-_]')
@@ -41,6 +43,7 @@ class Database:
UUID_VAR_DIR,
self.migration_status_file
)
self.migration_digest_file = "migrations_hash_" + database_name
def run_db_migrations(self) -> None:
# We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
@@ -152,13 +155,20 @@ class Database:
# changes, we can safely assume we don't need to run
# migrations without spending a few 100ms parsing all the
# Python migration code.
check_migrations = file_or_package_hash_updated(
"migrations_hash_" + database_name
migration_paths(),
)
if not check_migrations:
if not self.is_digest_obsolete():
return 'current'
'''
NOTE:
We immediately update the digest, assuming our
callers will do what it takes to run the migrations.
Ideally our callers would just do it themselves
AFTER the migrations actually succeeded, but the
caller codepaths are kind of complicated here.
'''
self.write_new_digest()
migration_op = self.what_to_do_with_migrations()
if migration_op == 'scrap':
return 'needs_rebuild'
@@ -168,6 +178,18 @@ class Database:
return 'current'
def is_digest_obsolete(self) -> bool:
return is_digest_obsolete(
self.migration_digest_file,
migration_paths(),
)
def write_new_digest(self) -> None:
write_new_digest(
self.migration_digest_file,
migration_paths(),
)
DEV_DATABASE = Database(
platform='dev',
database_name='zulip',