mirror of
https://github.com/zulip/zulip.git
synced 2025-11-16 11:52:01 +00:00
Optimize incremental virtualenv creation.
This adds a new system for copying packages from old virtualenvs that are sufficiently similar to the new virtualenv required. In practice, this results in a huge performance improvement for re-provisioning Zulip development environments when the requirements files have changed (which is the dominant performance problem with provision today). Fixes: #1507.
This commit is contained in:
@@ -156,3 +156,6 @@ polib==1.0.7
|
|||||||
# We're using this version from git to ensure compatibility with
|
# We're using this version from git to ensure compatibility with
|
||||||
# Jinja2==2.8 (see https://github.com/jorgebastida/glue/pull/211).
|
# Jinja2==2.8 (see https://github.com/jorgebastida/glue/pull/211).
|
||||||
git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#egg=glue==0.11.1
|
git+https://github.com/lorenzogil/glue@01c00cd33b9b78ea868300c266c16acd59a81bfc#egg=glue==0.11.1
|
||||||
|
|
||||||
|
# Needed for cloning virtual environments
|
||||||
|
git+https://github.com/umairwaheed/virtualenv-clone.git@short-version#egg=virtualenv-clone==0.2.6
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import sys
|
|||||||
from os.path import dirname, abspath
|
from os.path import dirname, abspath
|
||||||
import subprocess
|
import subprocess
|
||||||
from scripts.lib.zulip_tools import run
|
from scripts.lib.zulip_tools import run
|
||||||
|
from scripts.lib.hash_reqs import expand_reqs
|
||||||
|
|
||||||
ZULIP_PATH = dirname(dirname(dirname(abspath(__file__))))
|
ZULIP_PATH = dirname(dirname(dirname(abspath(__file__))))
|
||||||
VENV_CACHE_PATH = "/srv/zulip-venv-cache"
|
VENV_CACHE_PATH = "/srv/zulip-venv-cache"
|
||||||
@@ -15,7 +16,7 @@ if 'TRAVIS' in os.environ:
|
|||||||
|
|
||||||
if False:
|
if False:
|
||||||
# Don't add a runtime dependency on typing
|
# Don't add a runtime dependency on typing
|
||||||
from typing import List, Optional
|
from typing import List, Optional, Tuple, Set
|
||||||
|
|
||||||
VENV_DEPENDENCIES = [
|
VENV_DEPENDENCIES = [
|
||||||
"build-essential",
|
"build-essential",
|
||||||
@@ -34,6 +35,136 @@ VENV_DEPENDENCIES = [
|
|||||||
"libpq-dev", # Needed by psycopg2
|
"libpq-dev", # Needed by psycopg2
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_index_filename(venv_path):
|
||||||
|
# type: (str) -> str
|
||||||
|
return os.path.join(venv_path, 'package_index')
|
||||||
|
|
||||||
|
def get_package_names(requirements_file):
|
||||||
|
# type: (str) -> List[str]
|
||||||
|
packages = expand_reqs(requirements_file)
|
||||||
|
cleaned = []
|
||||||
|
operators = ['~=', '==', '!=', '<', '>']
|
||||||
|
for package in packages:
|
||||||
|
for operator in operators:
|
||||||
|
if operator in package:
|
||||||
|
package = package.split(operator)[0]
|
||||||
|
|
||||||
|
package = package.strip()
|
||||||
|
if package:
|
||||||
|
cleaned.append(package.lower())
|
||||||
|
|
||||||
|
return sorted(cleaned)
|
||||||
|
|
||||||
|
def create_requirements_index_file(venv_path, requirements_file):
|
||||||
|
# type: (str, str) -> str
|
||||||
|
"""
|
||||||
|
Creates a file, called package_index, in the virtual environment
|
||||||
|
directory that contains all the PIP packages installed in the
|
||||||
|
virtual environment. This file is used to determine the packages
|
||||||
|
that can be copied to a new virtual environment.
|
||||||
|
"""
|
||||||
|
index_filename = get_index_filename(venv_path)
|
||||||
|
packages = get_package_names(requirements_file)
|
||||||
|
with open(index_filename, 'w') as writer:
|
||||||
|
writer.write('\n'.join(packages))
|
||||||
|
writer.write('\n')
|
||||||
|
|
||||||
|
return index_filename
|
||||||
|
|
||||||
|
def get_venv_packages(venv_path):
|
||||||
|
# type: (str) -> Set[str]
|
||||||
|
"""
|
||||||
|
Returns the packages installed in the virtual environment using the
|
||||||
|
package index file.
|
||||||
|
"""
|
||||||
|
with open(get_index_filename(venv_path)) as reader:
|
||||||
|
return set(p.strip() for p in reader.read().split('\n') if p.strip())
|
||||||
|
|
||||||
|
def try_to_copy_venv(venv_path, new_packages):
|
||||||
|
# type: (str, Set[str]) -> bool
|
||||||
|
"""
|
||||||
|
Tries to copy packages from an old virtual environment in the cache
|
||||||
|
to the new virtual environment. The algorithm works as follows:
|
||||||
|
1. Find a virtual environment, v, from the cache that has the
|
||||||
|
highest overlap with the new requirements such that:
|
||||||
|
a. The new requirements only add to the packages of v.
|
||||||
|
b. The new requirements only upgrade packages of v.
|
||||||
|
2. Copy the contents of v to the new virtual environment using
|
||||||
|
virtualenv-clone.
|
||||||
|
3. Delete all .pyc files in the new virtual environment.
|
||||||
|
"""
|
||||||
|
venv_name = os.path.basename(venv_path)
|
||||||
|
|
||||||
|
overlaps = [] # type: List[Tuple[int, str, Set[str]]]
|
||||||
|
old_packages = set() # type: Set[str]
|
||||||
|
for sha1sum in os.listdir(VENV_CACHE_PATH):
|
||||||
|
curr_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, venv_name)
|
||||||
|
if (curr_venv_path == venv_path or
|
||||||
|
not os.path.exists(get_index_filename(curr_venv_path))):
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_packages = get_venv_packages(curr_venv_path)
|
||||||
|
# We only consider using using old virtualenvs that only
|
||||||
|
# contain packages that we want in our new virtualenv.
|
||||||
|
if not (old_packages - new_packages):
|
||||||
|
overlap = new_packages & old_packages
|
||||||
|
overlaps.append((len(overlap), curr_venv_path, overlap))
|
||||||
|
|
||||||
|
target_log = get_logfile_name(venv_path)
|
||||||
|
source_venv_path = None
|
||||||
|
if overlaps:
|
||||||
|
# Here, we select the old virtualenv with the largest overlap
|
||||||
|
overlaps = sorted(overlaps)
|
||||||
|
_, source_venv_path, copied_packages = overlaps[-1]
|
||||||
|
print('Copying packages from {}'.format(source_venv_path))
|
||||||
|
clone_ve = "{}/bin/virtualenv-clone".format(source_venv_path)
|
||||||
|
cmd = "sudo {exe} {source} {target}".format(exe=clone_ve,
|
||||||
|
source=source_venv_path,
|
||||||
|
target=venv_path).split()
|
||||||
|
try:
|
||||||
|
run(cmd)
|
||||||
|
except Exception:
|
||||||
|
# Virtualenv-clone is not installed. Install it and try running
|
||||||
|
# the command again.
|
||||||
|
run("{}/bin/pip install --no-deps virtualenv-clone".format(
|
||||||
|
source_venv_path).split())
|
||||||
|
run(cmd)
|
||||||
|
|
||||||
|
run(["sudo", "chown", "-R",
|
||||||
|
"{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
||||||
|
source_log = get_logfile_name(source_venv_path)
|
||||||
|
copy_parent_log(source_log, target_log)
|
||||||
|
create_log_entry(target_log, source_venv_path, copied_packages,
|
||||||
|
new_packages - copied_packages)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_logfile_name(venv_path):
|
||||||
|
# type: (str) -> str
|
||||||
|
return "{}/setup-venv.log".format(venv_path)
|
||||||
|
|
||||||
|
def create_log_entry(target_log, parent, copied_packages, new_packages):
|
||||||
|
# type: (str, str, Set[str], Set[str]) -> None
|
||||||
|
|
||||||
|
venv_path = dirname(target_log)
|
||||||
|
with open(target_log, 'a') as writer:
|
||||||
|
writer.write("{}\n".format(venv_path))
|
||||||
|
if copied_packages:
|
||||||
|
writer.write(
|
||||||
|
"Copied from {}:\n".format(parent))
|
||||||
|
writer.write("\n".join('- {}'.format(p) for p in sorted(copied_packages)))
|
||||||
|
writer.write("\n")
|
||||||
|
|
||||||
|
writer.write("New packages:\n")
|
||||||
|
writer.write("\n".join('- {}'.format(p) for p in sorted(new_packages)))
|
||||||
|
writer.write("\n\n")
|
||||||
|
|
||||||
|
def copy_parent_log(source_log, target_log):
|
||||||
|
# type: (str, str) -> None
|
||||||
|
if os.path.exists(source_log):
|
||||||
|
run('cp {} {}'.format(source_log, target_log).split())
|
||||||
|
|
||||||
def do_patch_activate_script(venv_path):
|
def do_patch_activate_script(venv_path):
|
||||||
# type: (str) -> None
|
# type: (str) -> None
|
||||||
"""
|
"""
|
||||||
@@ -83,11 +214,18 @@ def do_setup_virtualenv(venv_path, requirements_file, virtualenv_args):
|
|||||||
# type: (str, str, List[str]) -> None
|
# type: (str, str, List[str]) -> None
|
||||||
|
|
||||||
# Setup Python virtualenv
|
# Setup Python virtualenv
|
||||||
run(["sudo", "rm", "-rf", venv_path])
|
new_packages = set(get_package_names(requirements_file))
|
||||||
run(["sudo", "mkdir", "-p", venv_path])
|
|
||||||
run(["sudo", "chown", "{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
|
||||||
run(["virtualenv"] + virtualenv_args + [venv_path])
|
|
||||||
|
|
||||||
|
run(["sudo", "rm", "-rf", venv_path])
|
||||||
|
if not try_to_copy_venv(venv_path, new_packages):
|
||||||
|
# Create new virtualenv.
|
||||||
|
run(["sudo", "mkdir", "-p", venv_path])
|
||||||
|
run(["sudo", "virtualenv"] + virtualenv_args + [venv_path])
|
||||||
|
run(["sudo", "chown", "-R",
|
||||||
|
"{}:{}".format(os.getuid(), os.getgid()), venv_path])
|
||||||
|
create_log_entry(get_logfile_name(venv_path), "", set(), new_packages)
|
||||||
|
|
||||||
|
create_requirements_index_file(venv_path, requirements_file)
|
||||||
# Switch current Python context to the virtualenv.
|
# Switch current Python context to the virtualenv.
|
||||||
activate_this = os.path.join(venv_path, "bin", "activate_this.py")
|
activate_this = os.path.join(venv_path, "bin", "activate_this.py")
|
||||||
exec(open(activate_this).read(), {}, dict(__file__=activate_this)) # type: ignore # https://github.com/python/mypy/issues/1577
|
exec(open(activate_this).read(), {}, dict(__file__=activate_this)) # type: ignore # https://github.com/python/mypy/issues/1577
|
||||||
|
|||||||
Reference in New Issue
Block a user