mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	https://github.com/mailgun/talon/pull/200 Signed-off-by: Anders Kaseorg <anders@zulip.com>
		
			
				
	
	
		
			366 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			366 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import logging
 | 
						|
import os
 | 
						|
import shutil
 | 
						|
import subprocess
 | 
						|
from typing import List, Optional, Set, Tuple
 | 
						|
 | 
						|
from scripts.lib.hash_reqs import expand_reqs, python_version
 | 
						|
from scripts.lib.zulip_tools import ENDC, WARNING, os_families, run, run_as_root
 | 
						|
 | 
						|
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
						|
VENV_CACHE_PATH = "/srv/zulip-venv-cache"
 | 
						|
 | 
						|
VENV_DEPENDENCIES = [
 | 
						|
    "build-essential",
 | 
						|
    "libffi-dev",
 | 
						|
    "libfreetype6-dev",  # Needed for image types with Pillow
 | 
						|
    "zlib1g-dev",  # Needed to handle compressed PNGs with Pillow
 | 
						|
    "libjpeg-dev",  # Needed to handle JPEGs with Pillow
 | 
						|
    "libldap2-dev",
 | 
						|
    "python3-dev",  # Needed to install typed-ast dependency of mypy
 | 
						|
    "python3-pip",
 | 
						|
    "virtualenv",
 | 
						|
    "libxml2-dev",  # Used for installing talon-core and python-xmlsec
 | 
						|
    "libxslt1-dev",  # Used for installing talon-core
 | 
						|
    "libpq-dev",  # Needed by psycopg2
 | 
						|
    "libssl-dev",  # Needed to build pycurl and other libraries
 | 
						|
    "libmagic1",  # Used for install python-magic
 | 
						|
    "libyaml-dev",  # For fast YAML parsing in PyYAML
 | 
						|
    # Needed by python-xmlsec:
 | 
						|
    "libxmlsec1-dev",
 | 
						|
    "pkg-config",
 | 
						|
    # This is technically a node dependency, but we add it here
 | 
						|
    # because we don't have another place that we install apt packages
 | 
						|
    # on upgrade of a production server, and it's not worth adding
 | 
						|
    # another call to `apt install` for.
 | 
						|
    "jq",  # Used by scripts/lib/install-node to check yarn version
 | 
						|
    "libsasl2-dev",  # For building python-ldap from source
 | 
						|
]
 | 
						|
 | 
						|
COMMON_YUM_VENV_DEPENDENCIES = [
 | 
						|
    "libffi-devel",
 | 
						|
    "freetype-devel",
 | 
						|
    "zlib-devel",
 | 
						|
    "libjpeg-turbo-devel",
 | 
						|
    "openldap-devel",
 | 
						|
    "libyaml-devel",
 | 
						|
    # Needed by python-xmlsec:
 | 
						|
    "gcc",
 | 
						|
    "python3-devel",
 | 
						|
    "libxml2-devel",
 | 
						|
    "xmlsec1-devel",
 | 
						|
    "xmlsec1-openssl-devel",
 | 
						|
    "libtool-ltdl-devel",
 | 
						|
    "libxslt-devel",
 | 
						|
    "postgresql-libs",  # libpq-dev on apt
 | 
						|
    "openssl-devel",
 | 
						|
    "jq",
 | 
						|
]
 | 
						|
 | 
						|
REDHAT_VENV_DEPENDENCIES = [
 | 
						|
    *COMMON_YUM_VENV_DEPENDENCIES,
 | 
						|
    "python36-devel",
 | 
						|
    "python-virtualenv",
 | 
						|
]
 | 
						|
 | 
						|
FEDORA_VENV_DEPENDENCIES = [
 | 
						|
    *COMMON_YUM_VENV_DEPENDENCIES,
 | 
						|
    "python3-pip",
 | 
						|
    "virtualenv",  # see https://unix.stackexchange.com/questions/27877/install-virtualenv-on-fedora-16
 | 
						|
]
 | 
						|
 | 
						|
THUMBOR_VENV_DEPENDENCIES = [
 | 
						|
    "libcurl4-openssl-dev",
 | 
						|
    "libjpeg-dev",
 | 
						|
    "zlib1g-dev",
 | 
						|
    "libfreetype6-dev",
 | 
						|
    "libpng-dev",
 | 
						|
    "gifsicle",
 | 
						|
]
 | 
						|
 | 
						|
YUM_THUMBOR_VENV_DEPENDENCIES = [
 | 
						|
    "libcurl-devel",
 | 
						|
    "libjpeg-turbo-devel",
 | 
						|
    "zlib-devel",
 | 
						|
    "freetype-devel",
 | 
						|
    "libpng-devel",
 | 
						|
    "gifsicle",
 | 
						|
]
 | 
						|
 | 
						|
 | 
						|
def get_venv_dependencies(vendor: str, os_version: str) -> List[str]:
 | 
						|
    if "debian" in os_families():
 | 
						|
        return VENV_DEPENDENCIES
 | 
						|
    elif "rhel" in os_families():
 | 
						|
        return REDHAT_VENV_DEPENDENCIES
 | 
						|
    elif "fedora" in os_families():
 | 
						|
        return FEDORA_VENV_DEPENDENCIES
 | 
						|
    else:
 | 
						|
        raise AssertionError("Invalid vendor")
 | 
						|
 | 
						|
 | 
						|
def install_venv_deps(pip: str, requirements_file: str) -> None:
 | 
						|
    pip_requirements = os.path.join(ZULIP_PATH, "requirements", "pip.txt")
 | 
						|
    run([pip, "install", "--force-reinstall", "--require-hashes", "-r", pip_requirements])
 | 
						|
    run([pip, "install", "--no-deps", "--require-hashes", "-r", requirements_file])
 | 
						|
 | 
						|
 | 
						|
def get_index_filename(venv_path: str) -> str:
 | 
						|
    return os.path.join(venv_path, "package_index")
 | 
						|
 | 
						|
 | 
						|
def get_package_names(requirements_file: str) -> List[str]:
 | 
						|
    packages = expand_reqs(requirements_file)
 | 
						|
    cleaned = []
 | 
						|
    operators = ["~=", "==", "!=", "<", ">"]
 | 
						|
    for package in packages:
 | 
						|
        if package.startswith("git+https://") and "#egg=" in package:
 | 
						|
            split_package = package.split("#egg=")
 | 
						|
            if len(split_package) != 2:
 | 
						|
                raise Exception("Unexpected duplicate #egg in package {}".format(package))
 | 
						|
            # Extract the package name from Git requirements entries
 | 
						|
            package = split_package[1]
 | 
						|
 | 
						|
        for operator in operators:
 | 
						|
            if operator in package:
 | 
						|
                package = package.split(operator)[0]
 | 
						|
 | 
						|
        package = package.strip()
 | 
						|
        if package:
 | 
						|
            cleaned.append(package.lower())
 | 
						|
 | 
						|
    return sorted(cleaned)
 | 
						|
 | 
						|
 | 
						|
def create_requirements_index_file(venv_path: str, requirements_file: str) -> str:
 | 
						|
    """
 | 
						|
    Creates a file, called package_index, in the virtual environment
 | 
						|
    directory that contains all the PIP packages installed in the
 | 
						|
    virtual environment. This file is used to determine the packages
 | 
						|
    that can be copied to a new virtual environment.
 | 
						|
    """
 | 
						|
    index_filename = get_index_filename(venv_path)
 | 
						|
    packages = get_package_names(requirements_file)
 | 
						|
    with open(index_filename, "w") as writer:
 | 
						|
        writer.write("\n".join(packages))
 | 
						|
        writer.write("\n")
 | 
						|
 | 
						|
    return index_filename
 | 
						|
 | 
						|
 | 
						|
def get_venv_packages(venv_path: str) -> Set[str]:
 | 
						|
    """
 | 
						|
    Returns the packages installed in the virtual environment using the
 | 
						|
    package index file.
 | 
						|
    """
 | 
						|
    with open(get_index_filename(venv_path)) as reader:
 | 
						|
        return {p.strip() for p in reader.read().split("\n") if p.strip()}
 | 
						|
 | 
						|
 | 
						|
def try_to_copy_venv(venv_path: str, new_packages: Set[str]) -> bool:
 | 
						|
    """
 | 
						|
    Tries to copy packages from an old virtual environment in the cache
 | 
						|
    to the new virtual environment. The algorithm works as follows:
 | 
						|
        1. Find a virtual environment, v, from the cache that has the
 | 
						|
        highest overlap with the new requirements such that:
 | 
						|
            a. The new requirements only add to the packages of v.
 | 
						|
            b. The new requirements only upgrade packages of v.
 | 
						|
        2. Copy the contents of v to the new virtual environment using
 | 
						|
        virtualenv-clone.
 | 
						|
        3. Delete all .pyc files in the new virtual environment.
 | 
						|
    """
 | 
						|
    if not os.path.exists(VENV_CACHE_PATH):
 | 
						|
        return False
 | 
						|
 | 
						|
    desired_python_version = python_version()
 | 
						|
    venv_name = os.path.basename(venv_path)
 | 
						|
 | 
						|
    overlaps = []  # type: List[Tuple[int, str, Set[str]]]
 | 
						|
    old_packages = set()  # type: Set[str]
 | 
						|
    for sha1sum in os.listdir(VENV_CACHE_PATH):
 | 
						|
        curr_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, venv_name)
 | 
						|
        if curr_venv_path == venv_path or not os.path.exists(get_index_filename(curr_venv_path)):
 | 
						|
            continue
 | 
						|
 | 
						|
        # Check the Python version in the venv matches the version we want to use.
 | 
						|
        venv_python3 = os.path.join(curr_venv_path, "bin", "python3")
 | 
						|
        if not os.path.exists(venv_python3):
 | 
						|
            continue
 | 
						|
        venv_python_version = subprocess.check_output(
 | 
						|
            [venv_python3, "-VV"], universal_newlines=True
 | 
						|
        )
 | 
						|
        if desired_python_version != venv_python_version:
 | 
						|
            continue
 | 
						|
 | 
						|
        old_packages = get_venv_packages(curr_venv_path)
 | 
						|
        # We only consider using using old virtualenvs that only
 | 
						|
        # contain packages that we want in our new virtualenv.
 | 
						|
        if not (old_packages - new_packages):
 | 
						|
            overlap = new_packages & old_packages
 | 
						|
            overlaps.append((len(overlap), curr_venv_path, overlap))
 | 
						|
 | 
						|
    target_log = get_logfile_name(venv_path)
 | 
						|
    source_venv_path = None
 | 
						|
    if overlaps:
 | 
						|
        # Here, we select the old virtualenv with the largest overlap
 | 
						|
        overlaps = sorted(overlaps)
 | 
						|
        _, source_venv_path, copied_packages = overlaps[-1]
 | 
						|
        print("Copying packages from {}".format(source_venv_path))
 | 
						|
        clone_ve = "{}/bin/virtualenv-clone".format(source_venv_path)
 | 
						|
        cmd = [clone_ve, source_venv_path, venv_path]
 | 
						|
 | 
						|
        try:
 | 
						|
            # TODO: We can probably remove this in a few months, now
 | 
						|
            # that we can expect that virtualenv-clone is present in
 | 
						|
            # all of our recent virtualenvs.
 | 
						|
            run_as_root(cmd)
 | 
						|
        except subprocess.CalledProcessError:
 | 
						|
            # Virtualenv-clone is either not installed or threw an
 | 
						|
            # error.  Just return False: making a new venv is safe.
 | 
						|
            logging.warning("Error cloning virtualenv %s", source_venv_path)
 | 
						|
            return False
 | 
						|
 | 
						|
        # virtualenv-clone, unfortunately, copies the success stamp,
 | 
						|
        # which means if the upcoming `pip install` phase were to
 | 
						|
        # fail, we'd end up with a broken half-provisioned virtualenv
 | 
						|
        # that's incorrectly tagged as properly provisioned.  The
 | 
						|
        # right fix is to use
 | 
						|
        # https://github.com/edwardgeorge/virtualenv-clone/pull/38,
 | 
						|
        # but this rm is almost as good.
 | 
						|
        success_stamp_path = os.path.join(venv_path, "success-stamp")
 | 
						|
        run_as_root(["rm", "-f", success_stamp_path])
 | 
						|
 | 
						|
        run_as_root(["chown", "-R", "{}:{}".format(os.getuid(), os.getgid()), venv_path])
 | 
						|
        source_log = get_logfile_name(source_venv_path)
 | 
						|
        copy_parent_log(source_log, target_log)
 | 
						|
        create_log_entry(
 | 
						|
            target_log, source_venv_path, copied_packages, new_packages - copied_packages
 | 
						|
        )
 | 
						|
        return True
 | 
						|
 | 
						|
    return False
 | 
						|
 | 
						|
 | 
						|
def get_logfile_name(venv_path: str) -> str:
 | 
						|
    return "{}/setup-venv.log".format(venv_path)
 | 
						|
 | 
						|
 | 
						|
def create_log_entry(
 | 
						|
    target_log: str,
 | 
						|
    parent: str,
 | 
						|
    copied_packages: Set[str],
 | 
						|
    new_packages: Set[str],
 | 
						|
) -> None:
 | 
						|
 | 
						|
    venv_path = os.path.dirname(target_log)
 | 
						|
    with open(target_log, "a") as writer:
 | 
						|
        writer.write("{}\n".format(venv_path))
 | 
						|
        if copied_packages:
 | 
						|
            writer.write("Copied from {}:\n".format(parent))
 | 
						|
            writer.write("\n".join("- {}".format(p) for p in sorted(copied_packages)))
 | 
						|
            writer.write("\n")
 | 
						|
 | 
						|
        writer.write("New packages:\n")
 | 
						|
        writer.write("\n".join("- {}".format(p) for p in sorted(new_packages)))
 | 
						|
        writer.write("\n\n")
 | 
						|
 | 
						|
 | 
						|
def copy_parent_log(source_log: str, target_log: str) -> None:
 | 
						|
    if os.path.exists(source_log):
 | 
						|
        shutil.copyfile(source_log, target_log)
 | 
						|
 | 
						|
 | 
						|
def do_patch_activate_script(venv_path: str) -> None:
 | 
						|
    """
 | 
						|
    Patches the bin/activate script so that the value of the environment variable VIRTUAL_ENV
 | 
						|
    is set to venv_path during the script's execution whenever it is sourced.
 | 
						|
    """
 | 
						|
    # venv_path should be what we want to have in VIRTUAL_ENV after patching
 | 
						|
    script_path = os.path.join(venv_path, "bin", "activate")
 | 
						|
 | 
						|
    with open(script_path) as f:
 | 
						|
        lines = f.readlines()
 | 
						|
    for i, line in enumerate(lines):
 | 
						|
        if line.startswith("VIRTUAL_ENV="):
 | 
						|
            lines[i] = 'VIRTUAL_ENV="{}"\n'.format(venv_path)
 | 
						|
 | 
						|
    with open(script_path, "w") as f:
 | 
						|
        f.write("".join(lines))
 | 
						|
 | 
						|
 | 
						|
def generate_hash(requirements_file: str) -> str:
 | 
						|
    path = os.path.join(ZULIP_PATH, "scripts", "lib", "hash_reqs.py")
 | 
						|
    output = subprocess.check_output([path, requirements_file], universal_newlines=True)
 | 
						|
    return output.split()[0]
 | 
						|
 | 
						|
 | 
						|
def setup_virtualenv(
 | 
						|
    target_venv_path: Optional[str],
 | 
						|
    requirements_file: str,
 | 
						|
    patch_activate_script: bool = False,
 | 
						|
) -> str:
 | 
						|
 | 
						|
    sha1sum = generate_hash(requirements_file)
 | 
						|
    # Check if a cached version already exists
 | 
						|
    if target_venv_path is None:
 | 
						|
        cached_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, "venv")
 | 
						|
    else:
 | 
						|
        cached_venv_path = os.path.join(
 | 
						|
            VENV_CACHE_PATH, sha1sum, os.path.basename(target_venv_path)
 | 
						|
        )
 | 
						|
    success_stamp = os.path.join(cached_venv_path, "success-stamp")
 | 
						|
    if not os.path.exists(success_stamp):
 | 
						|
        do_setup_virtualenv(cached_venv_path, requirements_file)
 | 
						|
        with open(success_stamp, "w") as f:
 | 
						|
            f.close()
 | 
						|
 | 
						|
    print("Using cached Python venv from {}".format(cached_venv_path))
 | 
						|
    if target_venv_path is not None:
 | 
						|
        run_as_root(["ln", "-nsf", cached_venv_path, target_venv_path])
 | 
						|
        if patch_activate_script:
 | 
						|
            do_patch_activate_script(target_venv_path)
 | 
						|
    return cached_venv_path
 | 
						|
 | 
						|
 | 
						|
def add_cert_to_pipconf() -> None:
 | 
						|
    conffile = os.path.expanduser("~/.pip/pip.conf")
 | 
						|
    confdir = os.path.expanduser("~/.pip/")
 | 
						|
    os.makedirs(confdir, exist_ok=True)
 | 
						|
    run(["crudini", "--set", conffile, "global", "cert", os.environ["CUSTOM_CA_CERTIFICATES"]])
 | 
						|
 | 
						|
 | 
						|
def do_setup_virtualenv(venv_path: str, requirements_file: str) -> None:
 | 
						|
 | 
						|
    # Setup Python virtualenv
 | 
						|
    new_packages = set(get_package_names(requirements_file))
 | 
						|
 | 
						|
    run_as_root(["rm", "-rf", venv_path])
 | 
						|
    if not try_to_copy_venv(venv_path, new_packages):
 | 
						|
        # Create new virtualenv.
 | 
						|
        run_as_root(["mkdir", "-p", venv_path])
 | 
						|
        run_as_root(["virtualenv", "-p", "python3", venv_path])
 | 
						|
        run_as_root(["chown", "-R", "{}:{}".format(os.getuid(), os.getgid()), venv_path])
 | 
						|
        create_log_entry(get_logfile_name(venv_path), "", set(), new_packages)
 | 
						|
 | 
						|
    create_requirements_index_file(venv_path, requirements_file)
 | 
						|
 | 
						|
    pip = os.path.join(venv_path, "bin", "pip")
 | 
						|
 | 
						|
    # use custom certificate if needed
 | 
						|
    if os.environ.get("CUSTOM_CA_CERTIFICATES"):
 | 
						|
        print("Configuring pip to use custom CA certificates...")
 | 
						|
        add_cert_to_pipconf()
 | 
						|
 | 
						|
    try:
 | 
						|
        install_venv_deps(pip, requirements_file)
 | 
						|
    except subprocess.CalledProcessError:
 | 
						|
        try:
 | 
						|
            # Might be a failure due to network connection issues. Retrying...
 | 
						|
            print(WARNING + "`pip install` failed; retrying..." + ENDC)
 | 
						|
            install_venv_deps(pip, requirements_file)
 | 
						|
        except BaseException as e:
 | 
						|
            # Suppress exception chaining
 | 
						|
            raise e from None
 | 
						|
 | 
						|
    run_as_root(["chmod", "-R", "a+rX", venv_path])
 |