Files
zulip/zerver/management/commands/fetch_tor_exit_nodes.py
Alex Vandiver 49ad188449 rate_limit: Add a flag to lump all TOR exit node IPs together.
TOR users are legitimate users of the system; however, that system can
also be used for abuse -- specifically, by evading IP-based
rate-limiting.

For the purposes of IP-based rate-limiting, add a
RATE_LIMIT_TOR_TOGETHER flag, defaulting to false, which lumps all
requests from TOR exit nodes into the same bucket.  This may allow a
TOR user to deny other TOR users access to the find-my-account and
new-realm endpoints, but this is a low cost for cutting off a
significant potential abuse vector.

If enabled, the list of TOR exit nodes is fetched from their public
endpoint once per hour, via a cron job, and cached on disk.  Django
processes load this data from disk, and cache it in memcached.
Requests are spared from the burden of checking disk on failure via a
circuitbreaker, which trips of there are two failures in a row, and
only begins trying again after 10 minutes.
2021-11-16 11:42:00 -08:00

75 lines
2.5 KiB
Python

import os
from argparse import ArgumentParser
from typing import Any, Set
import orjson
from django.conf import settings
from requests.packages.urllib3.util.retry import Retry
from zerver.lib.management import ZulipBaseCommand
from zerver.lib.outgoing_http import OutgoingSession
class TorDataSession(OutgoingSession):
def __init__(self, max_retries: int) -> None:
Retry.BACKOFF_MAX = 64
retry = Retry(
total=max_retries,
backoff_factor=2.0,
status_forcelist={ # Retry on these
429, # The formal rate-limiting response code
500, # Server error
502, # Bad gateway
503, # Service unavailable
},
)
super().__init__(role="tor_data", timeout=3, max_retries=retry)
class Command(ZulipBaseCommand):
help = """Fetch the list of TOR exit nodes, and write the list of IP addresses
to a file for access from Django for rate-limiting purposes.
Does nothing unless RATE_LIMIT_TOR_TOGETHER is enabled.
"""
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument(
"--max-retries",
type=int,
default=10,
help="Number of times to retry fetching data from TOR",
)
def handle(*args: Any, **options: Any) -> None:
if not settings.RATE_LIMIT_TOR_TOGETHER:
return
certificates = os.environ.get("CUSTOM_CA_CERTIFICATES")
session = TorDataSession(max_retries=options["max_retries"])
response = session.get(
"https://check.torproject.org/exit-addresses",
verify=certificates,
)
response.raise_for_status()
# Format:
# ExitNode 4273E6D162ED2717A1CF4207A254004CD3F5307B
# Published 2021-11-02 11:01:07
# LastStatus 2021-11-02 23:00:00
# ExitAddress 176.10.99.200 2021-11-02 23:17:02
exit_nodes: Set[str] = set()
for line in response.text.splitlines():
if line.startswith("ExitAddress "):
exit_nodes.add(line.split()[1])
# Write to a tmpfile to ensure we can't read a partially-written file
with open(settings.TOR_EXIT_NODE_FILE_PATH + ".tmp", "wb") as f:
f.write(orjson.dumps(list(exit_nodes)))
# Do an atomic rename into place
os.rename(
settings.TOR_EXIT_NODE_FILE_PATH + ".tmp",
settings.TOR_EXIT_NODE_FILE_PATH,
)