requirements: Migrate to uv.

https://docs.astral.sh/uv/

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg
2025-02-24 15:01:01 -08:00
committed by Tim Abbott
parent 72f5df2e09
commit d7556b4060
59 changed files with 5962 additions and 8166 deletions

View File

@@ -1,4 +1,9 @@
#!/usr/bin/env python3
# TODO: After switching from pip to uv, we no longer create
# /srv/zulip-venv-cache or symlink zulip-py3-venv, so this script can be
# replaced with shutil.rmtree("/srv/zulip-venv-cache").
import argparse
import glob
import os

View File

@@ -7,8 +7,8 @@ ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__f
if ZULIP_PATH not in sys.path:
sys.path.append(ZULIP_PATH)
from scripts.lib.setup_venv import get_venv_dependencies, setup_virtualenv
from scripts.lib.zulip_tools import os_families, overwrite_symlink, parse_os_release, run
from scripts.lib.setup_venv import get_venv_dependencies
from scripts.lib.zulip_tools import os_families, parse_os_release, run
parser = argparse.ArgumentParser(description="Create a production virtualenv with caching")
parser.add_argument("deploy_path")
@@ -27,14 +27,6 @@ else:
print("Unsupported platform: {}".format(distro_info["ID"]))
sys.exit(1)
# Set the current working directory to the Zulip checkout, so the api/
# relative path in requirements/common.in works.
os.chdir(ZULIP_PATH)
venv_name = "zulip-py3-venv"
cached_venv_path = setup_virtualenv(
os.path.join(args.deploy_path, venv_name), os.path.join(ZULIP_PATH, "requirements", "prod.txt")
)
current_venv_path = os.path.join(args.deploy_path, "zulip-current-venv")
overwrite_symlink(venv_name, current_venv_path)
run(["scripts/lib/install-uv"])
run(["uv", "sync", "--frozen", "--only-group=prod"])

25
scripts/lib/install-uv Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -eu
version=0.6.3
arch="$(uname -m)"
tarball="uv-$arch-unknown-linux-gnu.tar.gz"
declare -A sha256=(
[aarch64]=447726788204106ffd8ecc59396fccc75fae7aca998555265b5ea6950b00160c
[x86_64]=b7a37a33d62cb7672716c695226450231e8c02a8eb2b468fa61cd28a8f86eab2
)
check_version() {
out="$(uv --version)" && [ "$out" = "uv $version" ]
}
if ! check_version 2>/dev/null; then
set -x
tmpdir="$(mktemp -d)"
trap 'rm -r "$tmpdir"' EXIT
cd "$tmpdir"
curl -fLO --retry 3 "https://github.com/astral-sh/uv/releases/download/$version/$tarball"
sha256sum -c <<<"${sha256[$arch]} $tarball"
tar -xzf "$tarball" --no-same-owner --strip-components=1 -C /usr/local/bin "uv-$arch-unknown-linux-gnu"/{uv,uvx}
check_version
fi

View File

@@ -8,9 +8,15 @@ import sys
def setup_path() -> None:
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
venv = os.path.realpath(os.path.join(BASE_DIR, "zulip-py3-venv"))
venv = os.path.realpath(os.path.join(BASE_DIR, ".venv"))
if sys.prefix != venv:
sys.path = list(filter(lambda p: "/zulip-py3-venv/" not in p, sys.path))
sys.path = list(
filter(
# zulip-py3-venv was an historical virtualenv symlink
lambda p: "/zulip-py3-venv/" not in p and "/.venv/" not in p,
sys.path,
)
)
activate_this = os.path.join(venv, "bin", "activate_this.py")
activate_locals = dict(__file__=activate_this)
with open(activate_this) as f:

View File

@@ -1,13 +1,8 @@
import logging
import os
import shutil
import subprocess
from scripts.lib.hash_reqs import expand_reqs, python_version
from scripts.lib.zulip_tools import ENDC, WARNING, os_families, run, run_as_root
from scripts.lib.zulip_tools import os_families
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
VENV_CACHE_PATH = "/srv/zulip-venv-cache"
VENV_DEPENDENCIES = [
"build-essential",
@@ -72,262 +67,3 @@ def get_venv_dependencies(vendor: str, os_version: str) -> list[str]:
return FEDORA_VENV_DEPENDENCIES
else:
raise AssertionError("Invalid vendor")
def install_venv_deps(pip: str, requirements_file: str) -> None:
pip_requirements = os.path.join(ZULIP_PATH, "requirements", "pip.txt")
run([pip, "install", "--force-reinstall", "--require-hashes", "-r", pip_requirements])
run(
[
pip,
"install",
"--use-deprecated=legacy-resolver", # https://github.com/pypa/pip/issues/5780
"--no-deps",
"--require-hashes",
"-r",
requirements_file,
]
)
def get_index_filename(venv_path: str) -> str:
return os.path.join(venv_path, "package_index")
def get_package_names(requirements_file: str) -> list[str]:
packages = expand_reqs(requirements_file)
cleaned = []
operators = ["~=", "==", "!=", "<", ">"]
for package in packages:
if package.startswith("git+https://") and "#egg=" in package:
split_package = package.split("#egg=")
if len(split_package) != 2:
raise Exception(f"Unexpected duplicate #egg in package {package}")
# Extract the package name from Git requirements entries
package = split_package[1]
for operator in operators:
if operator in package:
package = package.split(operator)[0]
package = package.strip()
if package:
cleaned.append(package.lower())
return sorted(cleaned)
def create_requirements_index_file(venv_path: str, requirements_file: str) -> str:
"""
Creates a file, called package_index, in the virtual environment
directory that contains all the PIP packages installed in the
virtual environment. This file is used to determine the packages
that can be copied to a new virtual environment.
"""
index_filename = get_index_filename(venv_path)
packages = get_package_names(requirements_file)
with open(index_filename, "w") as writer:
writer.write("\n".join(packages))
writer.write("\n")
return index_filename
def get_venv_packages(venv_path: str) -> set[str]:
"""
Returns the packages installed in the virtual environment using the
package index file.
"""
with open(get_index_filename(venv_path)) as reader:
return {p.strip() for p in reader.read().split("\n") if p.strip()}
def try_to_copy_venv(venv_path: str, new_packages: set[str]) -> bool:
"""
Tries to copy packages from an old virtual environment in the cache
to the new virtual environment. The algorithm works as follows:
1. Find a virtual environment, v, from the cache that has the
highest overlap with the new requirements such that:
a. The new requirements only add to the packages of v.
b. The new requirements only upgrade packages of v.
2. Copy the contents of v to the new virtual environment using
virtualenv-clone.
3. Delete all .pyc files in the new virtual environment.
"""
if not os.path.exists(VENV_CACHE_PATH):
return False
desired_python_version = python_version()
venv_name = os.path.basename(venv_path)
overlaps: list[tuple[int, str, set[str]]] = []
old_packages: set[str] = set()
for sha1sum in os.listdir(VENV_CACHE_PATH):
curr_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, venv_name)
if curr_venv_path == venv_path or not os.path.exists(get_index_filename(curr_venv_path)):
continue
# Check the Python version in the venv matches the version we want to use.
venv_python3 = os.path.join(curr_venv_path, "bin", "python3")
if not os.path.exists(venv_python3):
continue
venv_python_version = subprocess.check_output([venv_python3, "-VV"], text=True)
if desired_python_version != venv_python_version:
continue
old_packages = get_venv_packages(curr_venv_path)
# We only consider using old virtualenvs that only
# contain packages that we want in our new virtualenv.
if not (old_packages - new_packages):
overlap = new_packages & old_packages
overlaps.append((len(overlap), curr_venv_path, overlap))
target_log = get_logfile_name(venv_path)
source_venv_path = None
if overlaps:
# Here, we select the old virtualenv with the largest overlap
overlaps = sorted(overlaps)
_, source_venv_path, copied_packages = overlaps[-1]
print(f"Copying packages from {source_venv_path}")
clone_ve = f"{source_venv_path}/bin/virtualenv-clone"
cmd = [clone_ve, source_venv_path, venv_path]
try:
# TODO: We can probably remove this in a few months, now
# that we can expect that virtualenv-clone is present in
# all of our recent virtualenvs.
run_as_root(cmd)
except subprocess.CalledProcessError:
# Virtualenv-clone is either not installed or threw an
# error. Just return False: making a new venv is safe.
logging.warning("Error cloning virtualenv %s", source_venv_path)
return False
# virtualenv-clone, unfortunately, copies the success stamp,
# which means if the upcoming `pip install` phase were to
# fail, we'd end up with a broken half-provisioned virtualenv
# that's incorrectly tagged as properly provisioned. The
# right fix is to use
# https://github.com/edwardgeorge/virtualenv-clone/pull/38,
# but this rm is almost as good.
success_stamp_path = os.path.join(venv_path, "success-stamp")
run_as_root(["rm", "-f", success_stamp_path])
run_as_root(["chown", "-R", f"{os.getuid()}:{os.getgid()}", venv_path])
source_log = get_logfile_name(source_venv_path)
copy_parent_log(source_log, target_log)
create_log_entry(
target_log, source_venv_path, copied_packages, new_packages - copied_packages
)
return True
return False
def get_logfile_name(venv_path: str) -> str:
return f"{venv_path}/setup-venv.log"
def create_log_entry(
target_log: str,
parent: str,
copied_packages: set[str],
new_packages: set[str],
) -> None:
venv_path = os.path.dirname(target_log)
with open(target_log, "a") as writer:
writer.write(f"{venv_path}\n")
if copied_packages:
writer.write(f"Copied from {parent}:\n")
writer.write("\n".join(f"- {p}" for p in sorted(copied_packages)))
writer.write("\n")
writer.write("New packages:\n")
writer.write("\n".join(f"- {p}" for p in sorted(new_packages)))
writer.write("\n\n")
def copy_parent_log(source_log: str, target_log: str) -> None:
if os.path.exists(source_log):
shutil.copyfile(source_log, target_log)
def do_patch_activate_script(venv_path: str) -> None:
"""
Patches the bin/activate script so that the value of the environment variable VIRTUAL_ENV
is set to venv_path during the script's execution whenever it is sourced.
"""
# venv_path should be what we want to have in VIRTUAL_ENV after patching
script_path = os.path.join(venv_path, "bin", "activate")
with open(script_path) as f:
lines = f.readlines()
for i, line in enumerate(lines):
if line.startswith("VIRTUAL_ENV="):
lines[i] = f'VIRTUAL_ENV="{venv_path}"\n'
with open(script_path, "w") as f:
f.write("".join(lines))
def generate_hash(requirements_file: str) -> str:
path = os.path.join(ZULIP_PATH, "scripts", "lib", "hash_reqs.py")
output = subprocess.check_output([path, requirements_file], text=True)
return output.split()[0]
def setup_virtualenv(
target_venv_path: str | None,
requirements_file: str,
patch_activate_script: bool = False,
) -> str:
sha1sum = generate_hash(requirements_file)
# Check if a cached version already exists
if target_venv_path is None:
cached_venv_path = os.path.join(VENV_CACHE_PATH, sha1sum, "venv")
else:
cached_venv_path = os.path.join(
VENV_CACHE_PATH, sha1sum, os.path.basename(target_venv_path)
)
success_stamp = os.path.join(cached_venv_path, "success-stamp")
if not os.path.exists(success_stamp):
do_setup_virtualenv(cached_venv_path, requirements_file)
with open(success_stamp, "w") as f:
f.close()
print(f"Using cached Python venv from {cached_venv_path}")
if target_venv_path is not None:
run_as_root(["ln", "-nsf", cached_venv_path, target_venv_path])
if patch_activate_script:
do_patch_activate_script(target_venv_path)
return cached_venv_path
def do_setup_virtualenv(venv_path: str, requirements_file: str) -> None:
# Set up Python virtualenv
new_packages = set(get_package_names(requirements_file))
run_as_root(["rm", "-rf", venv_path])
if not try_to_copy_venv(venv_path, new_packages):
# Create new virtualenv.
run_as_root(["mkdir", "-p", venv_path])
run_as_root(["virtualenv", "-p", "python3", "--no-download", venv_path])
run_as_root(["chown", "-R", f"{os.getuid()}:{os.getgid()}", venv_path])
create_log_entry(get_logfile_name(venv_path), "", set(), new_packages)
create_requirements_index_file(venv_path, requirements_file)
pip = os.path.join(venv_path, "bin", "pip")
try:
install_venv_deps(pip, requirements_file)
except subprocess.CalledProcessError:
try:
# Might be a failure due to network connection issues. Retrying...
print(WARNING + "`pip install` failed; retrying..." + ENDC)
install_venv_deps(pip, requirements_file)
except BaseException as e:
# Suppress exception chaining
raise e from None
run_as_root(["chmod", "-R", "a+rX", venv_path])