diff --git a/.github/workflows/production-suite.yml b/.github/workflows/production-suite.yml index 939a0974ab..d966c133dd 100644 --- a/.github/workflows/production-suite.yml +++ b/.github/workflows/production-suite.yml @@ -30,6 +30,8 @@ defaults: jobs: production_build: + # This job builds a release tarball from the current commit, which + # will be used for all of the following install/upgrade tests. name: Bionic production build runs-on: ubuntu-latest @@ -106,6 +108,9 @@ jobs: run: tools/ci/send-failure-message production_install: + # This job installs the server release tarball built above on a + # range of platforms, and does some basic health checks on the + # resulting installer Zulip server. strategy: fail-fast: false matrix: @@ -208,3 +213,61 @@ jobs: env: ZULIP_BOT_KEY: ${{ secrets.ZULIP_BOT_KEY }} run: /tmp/send-failure-message + + production_upgrade: + # The production upgrade job starts with a container with a + # previous Zulip release installed, and attempts to upgrade it to + # the release tarball built for the current commit being tested. + # + # This is intended to catch bugs that result in the upgrade + # process failing. + strategy: + fail-fast: false + matrix: + include: + # Base images are built using `tools/ci/Dockerfile.prod.template`. + # The comments at the top explain how to build and upload these images. + - docker_image: zulip/ci:buster-3.4 + name: 3.4 Version Upgrade + is_focal: true + os: buster + + name: ${{ matrix.name }} + container: ${{ matrix.docker_image }} + runs-on: ubuntu-latest + needs: production_build + + steps: + - name: Download built production tarball + uses: actions/download-artifact@v2 + with: + name: production-tarball + path: /tmp + + - name: Add required permissions and setup + run: | + # This is the GitHub Actions specific cache directory the + # the current github user must be able to access for the + # cache action to work. It is owned by root currently. + sudo chmod -R 0777 /__w/_temp/ + + # Since actions/download-artifact@v2 loses all the permissions + # of the tarball uploaded by the upload artifact fix those. + chmod +x /tmp/production-upgrade + chmod +x /tmp/production-verify + chmod +x /tmp/send-failure-message + + - name: Upgrade production + run: sudo /tmp/production-upgrade + + # TODO: We should be running production-verify here, but it + # doesn't pass yet. + # + # - name: Verify install + # run: sudo /tmp/production-verify + + - name: Report status + if: failure() + env: + ZULIP_BOT_KEY: ${{ secrets.ZULIP_BOT_KEY }} + run: /tmp/send-failure-message diff --git a/tools/ci/production-build b/tools/ci/production-build index 6fd95fc2e4..0cf41ee9c7 100755 --- a/tools/ci/production-build +++ b/tools/ci/production-build @@ -37,6 +37,7 @@ cp -a \ tools/ci/success-http-headers.template.debian.txt \ tools/ci/production-install \ tools/ci/production-verify \ + tools/ci/production-upgrade \ tools/ci/production-upgrade-pg \ tools/ci/production-extract-tarball \ tools/ci/send-failure-message \ diff --git a/tools/ci/production-upgrade b/tools/ci/production-upgrade new file mode 100644 index 0000000000..f23e271e3f --- /dev/null +++ b/tools/ci/production-upgrade @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Given a Zulip production environment that had been installed with a +# previous version of Zulip, upgrade it to the commit being tested. +# This takes as input the tarball generated by production-build. +set -e +set -x + +# Structurally, this script should just call upgrade-zulip. However, +# because of a set of issues that result in the previously installed +# GitHub Actions Docker containers not actually working on boot, we +# need to do some preparatory steps. It is a goal to delete these +# steps. + +# Reinstall rabbitmq-server and supervisor. +# +# * For rabbitmq-server, we likely need to do this to work around the +# hostname changing on reboot causing RabbitMQ to not boot. +# * For supervisor, we don't understand why it doesn't start properly. +sudo apt-get remove rabbitmq-server supervisor && sudo apt-get purge rabbitmq-server supervisor +sudo apt-get install rabbitmq-server supervisor + +# Start the postgresql service. +sudo service postgresql start + +# Starting the rabbitmq-server +if ! sudo service rabbitmq-server start; then + echo + echo "Starting rabbitmq-server failed. Trying again:" + sudo service rabbitmq-server start +fi + +# Apply puppet (still on the previous release the container was +# installed with). This should leave us with a working copy of Zulip +# running a previous release. +sudo /home/zulip/deployments/current/scripts/zulip-puppet-apply -f + +# Stopping nginx service started by above command. +# +# This is a workaround for an unexpected `Unable to stop +# Service[nginx]` error in the puppet apply step of upgrade otherwise. +if ! sudo service nginx stop; then + echo + echo "Stoping nginx failed. Trying again:" + sudo service nginx stop +fi + +# Zulip releases before 2.1.8/3.5/4.4 have a bug in their +# `upgrade-zulip` scripts, resulting in them exiting with status 0 +# unconditionally. We work around that by running +# scripts/lib/upgrade-zulip instead. +UPGRADE_SCRIPT=/home/zulip/deployments/current/scripts/lib/upgrade-zulip + +# Execute the upgrade. +sudo "$UPGRADE_SCRIPT" /tmp/zulip-server-test.tar.gz