From 8339c216377e66f57abea6863a8d368fb9a78160 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@zulipchat.com>
Date: Tue, 28 May 2019 15:50:12 -0700
Subject: [PATCH] test-backend: Fix db issues with running two copies in
 parallel.

Sometimes it's useful to run two copies of test-backend at the same
time.  The problem with doing so is that we need to make sure no two
threads are using the same test database ID.

Previously, this worked only if at most one of those copies was
running in the single-threaded mode, because we used a random database
ID for the single-threaded code path, but the same IDs counting from 0
for the parallel code path.

Fix this, mostly, by generating a random start for the range of IDs
used by the process, and then counting off database IDs starting from
there (both in the parallel and non-paralllel modes).

There's still a very low probability race, see the TODO.
Additionally, there appear to be some other races with running two
copies of test-backend at the same time not related to the database.

See https://github.com/zulip/zulip/issues/12426 for a follow-up issue
that's sorta created by this.
---
 zerver/lib/test_runner.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/zerver/lib/test_runner.py b/zerver/lib/test_runner.py
index 3d8df61e82..eba8433440 100644
--- a/zerver/lib/test_runner.py
+++ b/zerver/lib/test_runner.py
@@ -31,6 +31,13 @@ import unittest
 
 from multiprocessing.sharedctypes import Synchronized
 
+# We need to pick an ID for this test-backend invocation, and store it
+# in this global so it can be used in init_worker; this is used to
+# ensure the database IDs we select are unique for each `test-backend`
+# run.  This probably should use a locking mechanism rather than the
+# below hack, which fails 1/10000000 of the itme.
+random_id_range_start = random.randint(1, 10000000) * 100
+
 _worker_id = 0  # Used to identify the worker process.
 
 ReturnT = TypeVar('ReturnT')  # Constrain return type to match
@@ -218,11 +225,9 @@ def _replacement_destroy_test_db(self: DatabaseCreation,
                        % (self.connection.ops.quote_name(test_database_name),))
 DatabaseCreation._destroy_test_db = _replacement_destroy_test_db
 
-def destroy_test_databases(database_id: Optional[int]=None) -> None:
-    """
-    When database_id is None, the name of the databases is picked up
-    by the database settings.
-    """
+def destroy_test_databases(worker_id: int) -> None:
+    """Modified from the Django original to """
+    database_id = random_id_range_start + worker_id
     for alias in connections:
         connection = connections[alias]
         try:
@@ -231,7 +236,8 @@ def destroy_test_databases(database_id: Optional[int]=None) -> None:
             # DB doesn't exist. No need to do anything.
             pass
 
-def create_test_databases(database_id: int) -> None:
+def create_test_databases(worker_id: int) -> None:
+    database_id = random_id_range_start + worker_id
     for alias in connections:
         connection = connections[alias]
         connection.creation.clone_test_db(
@@ -373,7 +379,6 @@ class Runner(DiscoverRunner):
         # in `zerver.tests.test_templates`.
         self.shallow_tested_templates = set()  # type: Set[str]
         template_rendered.connect(self.on_template_rendered)
-        self.database_id = random.randint(1, 10000)
 
     def get_resultclass(self) -> Type[TestResult]:
         return TextTestResult
@@ -404,7 +409,7 @@ class Runner(DiscoverRunner):
             # In parallel mode (parallel > 1), destroy_test_databases will
             # destroy settings.BACKEND_DATABASE_TEMPLATE; we don't want that.
             # So run this only in serial mode.
-            destroy_test_databases()
+            destroy_test_databases(_worker_id)
         return super().teardown_test_environment(*args, **kwargs)
 
     def test_imports(self, test_labels: List[str], suite: unittest.TestSuite) -> None:
@@ -462,8 +467,10 @@ class Runner(DiscoverRunner):
             # because it will be called for both serial and parallel modes.
             # However, at this point we know in which mode we would be running
             # since that decision has already been made in build_suite().
-            destroy_test_databases(self.database_id)
-            create_test_databases(self.database_id)
+            #
+            # We pass a _worker_id, which in this code path is always 0
+            destroy_test_databases(_worker_id)
+            create_test_databases(_worker_id)
 
         # We have to do the next line to avoid flaky scenarios where we
         # run a single test and getting an SA connection causes data from