From 4e22a79e6adcc7070e0ff4a724f7d3b489b63c54 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Tue, 19 Nov 2024 23:16:01 +0100 Subject: [PATCH] zilencer: Add flow for a server to reclaim its registration. If the server controls the registration's hostname, it can reclaim its registration credentials. This is useful, because self-hosted admins frequently lose the credentials when moving their Zulip server to a different machine / deployment method. The flow is the following: 1. The host sends a POST request to /api/v1/remotes/server/register/takeover. 2. The bouncer responds with a signed token. 3. The host prepares to serve this token at /api/v1/zulip-services/verify and sends a POST to /remotes/server/register/verify_challenge endpoint of the bouncer. 4. Upon receiving the POST request, the bouncer GETS https://{hostname}/api/v1/zulip-services/verify, verifies the secret and responds to the original POST with the registration credentials. 5. The host can now save these credentials to it zulip-secrets.conf file and thus regains its push notifications registration. Includes a global rate limit on the usage of the /verify_challenge endpoint, as it causes us to make outgoing requests. --- zerver/lib/exceptions.py | 1 + zerver/lib/rate_limiter.py | 20 ++ zerver/lib/remote_server.py | 17 + zerver/management/commands/register_server.py | 89 ++++- zerver/models/realm_audit_logs.py | 1 + zerver/tests/test_push_notifications.py | 328 ++++++++++++++++++ zerver/views/push_notifications.py | 36 +- zilencer/auth.py | 29 ++ zilencer/urls.py | 7 + zilencer/views.py | 165 ++++++++- zproject/default_settings.py | 12 + zproject/urls.py | 8 + 12 files changed, 702 insertions(+), 11 deletions(-) diff --git a/zerver/lib/exceptions.py b/zerver/lib/exceptions.py index 25e7ccbf80..53e9168836 100644 --- a/zerver/lib/exceptions.py +++ b/zerver/lib/exceptions.py @@ -56,6 +56,7 @@ class ErrorCode(Enum): SYSTEM_GROUP_REQUIRED = auto() CANNOT_DEACTIVATE_GROUP_IN_USE = auto() CANNOT_ADMINISTER_CHANNEL = auto() + REMOTE_SERVER_VERIFICATION_SECRET_NOT_PREPARED = auto() class JsonableError(Exception): diff --git a/zerver/lib/rate_limiter.py b/zerver/lib/rate_limiter.py index 0ff3665c17..a0f0ce2584 100644 --- a/zerver/lib/rate_limiter.py +++ b/zerver/lib/rate_limiter.py @@ -157,6 +157,20 @@ class RateLimitedIPAddr(RateLimitedObject): return rules[self.domain] +class RateLimitedEndpoint(RateLimitedObject): + def __init__(self, endpoint_name: str) -> None: + self.endpoint_name = endpoint_name + super().__init__() + + @override + def key(self) -> str: + return f"{type(self).__name__}:{self.endpoint_name}" + + @override + def rules(self) -> list[tuple[int, int]]: + return settings.ABSOLUTE_USAGE_LIMITS_BY_ENDPOINT[self.endpoint_name] + + class RateLimiterBackend(ABC): @classmethod @abstractmethod @@ -603,6 +617,12 @@ def rate_limit_request_by_ip(request: HttpRequest, domain: str) -> None: RateLimitedIPAddr(ip_addr, domain=domain).rate_limit_request(request) +def rate_limit_endpoint_absolute(endpoint_name: str) -> None: + ratelimited, secs_to_freedom = RateLimitedEndpoint(endpoint_name).rate_limit() + if ratelimited: + raise RateLimitedError(secs_to_freedom) + + def should_rate_limit(request: HttpRequest) -> bool: if not settings.RATE_LIMITING: return False diff --git a/zerver/lib/remote_server.py b/zerver/lib/remote_server.py index 95d674b735..524b1f4d8c 100644 --- a/zerver/lib/remote_server.py +++ b/zerver/lib/remote_server.py @@ -1,4 +1,5 @@ import logging +import secrets from collections.abc import Mapping from typing import Any from urllib.parse import urljoin @@ -489,3 +490,19 @@ def maybe_enqueue_audit_log_upload(realm: Realm) -> None: if uses_notification_bouncer(): event = {"type": "push_bouncer_update_for_realm", "realm_id": realm.id} queue_event_on_commit("deferred_work", event) + + +SELF_HOSTING_REGISTRATION_TAKEOVER_CHALLENGE_TOKEN_REDIS_KEY = ( + "self_hosting_domain_takeover_challenge_verify" +) + + +def prepare_for_registration_takeover_challenge(verification_secret: str) -> str: + access_token = secrets.token_urlsafe(32) + data_to_store = {"verification_secret": verification_secret, "access_token": access_token} + redis_client.set( + redis_utils.REDIS_KEY_PREFIX + SELF_HOSTING_REGISTRATION_TAKEOVER_CHALLENGE_TOKEN_REDIS_KEY, + orjson.dumps(data_to_store), + ex=10, + ) + return access_token diff --git a/zerver/management/commands/register_server.py b/zerver/management/commands/register_server.py index 03adb5a616..12ade08434 100644 --- a/zerver/management/commands/register_server.py +++ b/zerver/management/commands/register_server.py @@ -13,6 +13,7 @@ from typing_extensions import override from zerver.lib.management import ZulipBaseCommand, check_config from zerver.lib.remote_server import ( PushBouncerSession, + prepare_for_registration_takeover_challenge, send_json_to_push_bouncer, send_server_data_to_push_bouncer, ) @@ -39,6 +40,11 @@ class Command(ZulipBaseCommand): action="store_true", help="Automatically rotate your server's zulip_org_key", ) + action.add_argument( + "--registration-takeover", + action="store_true", + help="Overwrite pre-existing registration for the hostname", + ) action.add_argument( "--deactivate", action="store_true", @@ -72,10 +78,11 @@ class Command(ZulipBaseCommand): print("Mobile Push Notification Service registration successfully deactivated!") return - request = { + hostname = settings.EXTERNAL_HOST + request: dict[str, object] = { "zulip_org_id": settings.ZULIP_ORG_ID, "zulip_org_key": settings.ZULIP_ORG_KEY, - "hostname": settings.EXTERNAL_HOST, + "hostname": hostname, "contact_email": settings.ZULIP_ADMINISTRATOR, } if options["rotate_key"]: @@ -107,6 +114,17 @@ class Command(ZulipBaseCommand): # enough about what happened. return + if options["registration_takeover"]: + org_id, org_key = self.do_registration_takeover_flow(hostname) + # We still want to proceed with a regular request to the registration endpoint, + # as it'll update the registration with new information such as the contact email. + request["zulip_org_id"] = org_id + request["zulip_org_key"] = org_key + settings.ZULIP_ORG_ID = org_id + settings.ZULIP_ORG_KEY = org_key + print() + print("Proceeding to update the registration with current metadata...") + response = self._request_push_notification_bouncer_url( "/api/v1/remotes/server/register", request ) @@ -121,6 +139,8 @@ class Command(ZulipBaseCommand): else: if options["rotate_key"]: print(f"Success! Updating {SECRETS_FILENAME} with the new key...") + new_org_key = request["new_org_key"] + assert isinstance(new_org_key, str) subprocess.check_call( [ "crudini", @@ -129,17 +149,71 @@ class Command(ZulipBaseCommand): SECRETS_FILENAME, "secrets", "zulip_org_key", - request["new_org_key"], + new_org_key, ] ) print("Mobile Push Notification Service registration successfully updated!") + if options["registration_takeover"]: + print() + print( + "Make sure to restart the server next by running /home/zulip/deployments/current/scripts/restart-server " + "so that the new credentials are reloaded." + ) + + def do_registration_takeover_flow(self, hostname: str) -> tuple[str, str]: + params = {"hostname": hostname} + response = self._request_push_notification_bouncer_url( + "/api/v1/remotes/server/register/takeover", params + ) + verification_secret = response.json()["verification_secret"] + + print( + "Received a verification secret from the service. Preparing to serve it at the verification URL." + ) + token_for_push_bouncer = prepare_for_registration_takeover_challenge(verification_secret) + + print("Sending ACK to the service and awaiting completion of verification...") + response = self._request_push_notification_bouncer_url( + "/api/v1/remotes/server/register/verify_challenge", + dict(hostname=params["hostname"], access_token=token_for_push_bouncer), + ) + + org_id = response.json()["zulip_org_id"] + org_key = response.json()["zulip_org_key"] + # Update the secrets file. + print("Success! Updating secrets file with received credentials.") + subprocess.check_call( + [ + "crudini", + "--inplace", + "--set", + SECRETS_FILENAME, + "secrets", + "zulip_org_id", + org_id, + ] + ) + subprocess.check_call( + [ + "crudini", + "--inplace", + "--set", + SECRETS_FILENAME, + "secrets", + "zulip_org_key", + org_key, + ] + ) + print("Mobile Push Notification Service registration successfully transferred.") + return org_id, org_key + def _request_push_notification_bouncer_url(self, url: str, params: dict[str, Any]) -> Response: assert settings.ZULIP_SERVICES_URL is not None - registration_url = settings.ZULIP_SERVICES_URL + url + request_url = settings.ZULIP_SERVICES_URL + url session = PushBouncerSession() try: - response = session.post(registration_url, data=params) + response = session.post(request_url, data=params) except requests.RequestException: raise CommandError( "Network error connecting to push notifications service " @@ -154,6 +228,9 @@ class Command(ZulipBaseCommand): except Exception: raise e - raise CommandError("Error: " + content_dict["msg"]) + error_message = content_dict["msg"] + raise CommandError( + f'Error received from the push notification service: "{error_message}"' + ) return response diff --git a/zerver/models/realm_audit_logs.py b/zerver/models/realm_audit_logs.py index a9487ea197..d83501f159 100644 --- a/zerver/models/realm_audit_logs.py +++ b/zerver/models/realm_audit_logs.py @@ -124,6 +124,7 @@ class AuditLogEventType(IntEnum): REMOTE_SERVER_BILLING_MODALITY_CHANGED = 10211 REMOTE_SERVER_SPONSORSHIP_PENDING_STATUS_CHANGED = 10213 REMOTE_SERVER_CREATED = 10215 + REMOTE_SERVER_REGISTRATION_TRANSFERRED = 10216 # This value is for RemoteRealmAuditLog entries tracking changes to the # RemoteRealm model resulting from modified realm information sent to us diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index 4dccd7ba26..d6e2ad97be 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -11,6 +11,7 @@ from unittest import mock, skipUnless import aioapns import firebase_admin.messaging as firebase_messaging import orjson +import requests import responses import time_machine from django.conf import settings @@ -74,12 +75,14 @@ from zerver.lib.remote_server import ( PushNotificationBouncerServerError, build_analytics_data, get_realms_info_for_push_bouncer, + prepare_for_registration_takeover_challenge, record_push_notifications_recently_working, redis_client, send_server_data_to_push_bouncer, send_to_push_bouncer, ) from zerver.lib.response import json_response_from_error +from zerver.lib.send_email import FromAddress from zerver.lib.test_classes import BouncerTestCase, ZulipTestCase from zerver.lib.test_helpers import ( activate_push_notification_service, @@ -106,6 +109,10 @@ from zerver.models.realm_audit_logs import AuditLogEventType from zerver.models.realms import get_realm from zerver.models.scheduled_jobs import NotificationTriggers from zerver.models.streams import get_stream +from zilencer.auth import ( + REMOTE_SERVER_TAKEOVER_TOKEN_VALIDITY_SECONDS, + generate_registration_takeover_verification_secret, +) from zilencer.lib.remote_counts import MissingDataError from zilencer.models import RemoteZulipServerAuditLog from zilencer.views import DevicesToCleanUpDict @@ -5397,6 +5404,327 @@ class PushBouncerSignupTest(ZulipTestCase): self.assert_json_success(result) +@skipUnless(settings.ZILENCER_ENABLED, "requires zilencer") +class RegistrationTakeoverFlowTest(ZulipTestCase): + @override + def setUp(self) -> None: + super().setUp() + + self.zulip_org_id = str(uuid.uuid4()) + self.zulip_org_key = get_random_string(64) + self.hostname = "example.com" + request = dict( + zulip_org_id=self.zulip_org_id, + zulip_org_key=self.zulip_org_key, + hostname=self.hostname, + contact_email="server-admin@zulip.com", + ) + result = self.client_post("/api/v1/remotes/server/register", request) + self.assert_json_success(result) + + @responses.activate + def test_flow_end_to_end(self) -> None: + server = RemoteZulipServer.objects.get(uuid=self.zulip_org_id) + + result = self.client_post( + "/api/v1/remotes/server/register/takeover", {"hostname": self.hostname} + ) + self.assert_json_success(result) + data = result.json() + verification_secret = data["verification_secret"] + + access_token = prepare_for_registration_takeover_challenge(verification_secret) + # First we query the host's endpoint for serving the verification_secret. + result = self.client_post(f"/api/v1/zulip-services/verify/{access_token}/") + self.assert_json_success(result) + data = result.json() + served_verification_secret = data["verification_secret"] + self.assertEqual(served_verification_secret, verification_secret) + + # Now we return to testing the push bouncer and we send it the request that the hosts's + # admin will once the host is ready to serve the verification_secret. + responses.add( + responses.GET, + f"https://example.com/api/v1/zulip-services/verify/{access_token}/", + json={"verification_secret": verification_secret}, + status=200, + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_success(result) + new_uuid = result.json()["zulip_org_id"] + new_key = result.json()["zulip_org_key"] + # The uuid of the registration is preserved and delivered in this final response, + # but the secret key is rotated. + self.assertEqual(new_uuid, self.zulip_org_id) + self.assertNotEqual(new_key, self.zulip_org_key) + self.assertEqual( + mock_log.output, + ["INFO:zilencer.views:verify_registration_takeover:host:example.com|success"], + ) + + # Verify the registration got updated accordingly. + server.refresh_from_db() + self.assertEqual(str(server.uuid), new_uuid) + self.assertEqual(server.api_key, new_key) + + audit_log = RemoteZulipServerAuditLog.objects.filter(server=server).latest("id") + self.assertEqual( + audit_log.event_type, AuditLogEventType.REMOTE_SERVER_REGISTRATION_TRANSFERRED + ) + + @override_settings( + RATE_LIMITING=True, + ABSOLUTE_USAGE_LIMITS_BY_ENDPOINT={ + "verify_registration_takeover_challenge_ack_endpoint": [(10, 2)] + }, + ) + @responses.activate + def test_rate_limiting(self) -> None: + responses.get( + "https://example.com/api/v1/zulip-services/verify/sometoken/", + json={"verification_secret": "foo"}, + status=200, + ) + + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret is malformed") + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret is malformed") + + # Now the rate limit is hit. + with self.assertLogs("zilencer.views", level="WARNING") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error( + result, + f"The global limits on recent usage of this endpoint have been reached. Please try again later or reach out to {FromAddress.SUPPORT} for assistance.", + status_code=429, + ) + self.assertEqual( + mock_log.output, + [ + "WARNING:zilencer.views:Rate limit exceeded for verify_registration_takeover_challenge_ack_endpoint" + ], + ) + + @responses.activate + def test_ack_endpoint_errors(self) -> None: + time_now = now() + + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": "unregistered.example.com", "access_token": "sometoken"}, + ) + self.assert_json_error(result, "Registration not found for this hostname") + + responses.get( + "https://example.com/api/v1/zulip-services/verify/sometoken/", + json={"verification_secret": "foo"}, + status=200, + ) + + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret is malformed") + + with time_machine.travel(time_now, tick=False): + verification_secret = generate_registration_takeover_verification_secret(self.hostname) + responses.get( + "https://example.com/api/v1/zulip-services/verify/sometoken/", + json={"verification_secret": verification_secret}, + status=200, + ) + with time_machine.travel( + time_now + timedelta(seconds=REMOTE_SERVER_TAKEOVER_TOKEN_VALIDITY_SECONDS + 1), + tick=False, + ): + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret has expired") + + with ( + time_machine.travel(time_now, tick=False), + mock.patch("zilencer.auth.REMOTE_SERVER_TAKEOVER_TOKEN_SALT", "foo"), + ): + verification_secret = generate_registration_takeover_verification_secret(self.hostname) + responses.get( + "https://example.com/api/v1/zulip-services/verify/sometoken/", + json={"verification_secret": verification_secret}, + status=200, + ) + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret is invalid") + + # Make sure a valid verification secret for one hostname does not work for another. + with time_machine.travel(time_now, tick=False): + verification_secret = generate_registration_takeover_verification_secret( + "different.example.com" + ) + responses.get( + "https://example.com/api/v1/zulip-services/verify/sometoken/", + json={"verification_secret": verification_secret}, + status=200, + ) + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": "sometoken"}, + ) + self.assert_json_error(result, "The verification secret is for a different hostname") + + @responses.activate + def test_outgoing_verification_request_errors(self) -> None: + access_token = "sometoken" + base_url = f"https://{self.hostname}/api/v1/zulip-services/verify/{access_token}/" + + responses.add( + method=responses.GET, + url=base_url, + json={"code": "REMOTE_SERVER_VERIFICATION_SECRET_NOT_PREPARED"}, + status=400, + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error(result, "The host reported it has no verification secret.") + self.assertEqual( + mock_log.output, + [ + "INFO:zilencer.views:verify_registration_takeover:host:example.com|secret_not_prepared" + ], + ) + + # HttpError: + responses.add( + method=responses.GET, + url=base_url, + status=403, + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error(result, "Error response received from the host: 403") + self.assertIn( + "verify_registration_takeover:host:example.com|exception:", mock_log.output[0] + ) + + # SSLError: + responses.add( + method=responses.GET, + url=base_url, + body=requests.exceptions.SSLError("certificate verification failed"), + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error(result, "SSL error occurred while communicating with the host.") + self.assertIn( + "verify_registration_takeover:host:example.com|exception:", mock_log.output[0] + ) + + # ConnectionError: + responses.add( + method=responses.GET, + url=base_url, + body=requests.exceptions.ConnectionError("Fake connection error"), + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error( + result, "Connection error occurred while communicating with the host." + ) + self.assertIn( + "verify_registration_takeover:host:example.com|exception:", mock_log.output[0] + ) + + # Timeout: + responses.add( + method=responses.GET, + url=base_url, + body=requests.exceptions.Timeout("The request timed out"), + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error(result, "The request timed out while communicating with the host.") + self.assertIn( + "verify_registration_takeover:host:example.com|exception:", mock_log.output[0] + ) + + # Generic RequestException: + responses.add( + method=responses.GET, + url=base_url, + body=requests.exceptions.RequestException("Something else went wrong"), + ) + with self.assertLogs("zilencer.views", level="INFO") as mock_log: + result = self.client_post( + "/api/v1/remotes/server/register/verify_challenge", + {"hostname": self.hostname, "access_token": access_token}, + ) + self.assert_json_error(result, "An error occurred while communicating with the host.") + self.assertIn( + "verify_registration_takeover:host:example.com|exception:", mock_log.output[0] + ) + + def test_initiate_flow_for_unregistered_domain(self) -> None: + result = self.client_post( + "/api/v1/remotes/server/register/takeover", + {"hostname": "unregistered.example.com"}, + ) + self.assert_json_error(result, "unregistered.example.com not yet registered") + + def test_serve_verification_secret_endpoint(self) -> None: + result = self.client_get( + "/api/v1/zulip-services/verify/sometoken/", + ) + self.assert_json_error(result, "Verification secret not prepared") + + valid_access_token = prepare_for_registration_takeover_challenge(verification_secret="foo") + result = self.client_get( + f"/api/v1/zulip-services/verify/{valid_access_token}/", + ) + self.assert_json_success(result) + self.assertEqual(result.json()["verification_secret"], "foo") + + # Trying to access the verification secret with the wrong access_token should fail + # in a way indistinguishable from the case where the host is not prepared to serve + # a verification secret at all. + result = self.client_get( + "/api/v1/zulip-services/verify/wrongtoken/", + ) + self.assert_json_error(result, "Verification secret not prepared") + + class TestUserPushIdentityCompat(ZulipTestCase): def test_filter_q(self) -> None: user_identity_id = UserPushIdentityCompat(user_id=1) diff --git a/zerver/views/push_notifications.py b/zerver/views/push_notifications.py index c344ec5dd6..a8e2b4b82e 100644 --- a/zerver/views/push_notifications.py +++ b/zerver/views/push_notifications.py @@ -1,3 +1,4 @@ +import orjson from django.conf import settings from django.http import HttpRequest, HttpResponse, HttpResponseRedirect from django.shortcuts import render @@ -5,7 +6,9 @@ from django.urls import reverse from django.utils.translation import gettext as _ from zerver.decorator import human_users_only, zulip_login_required +from zerver.lib import redis_utils from zerver.lib.exceptions import ( + ErrorCode, JsonableError, MissingRemoteRealmError, OrganizationOwnerRequiredError, @@ -21,16 +24,19 @@ from zerver.lib.push_notifications import ( uses_notification_bouncer, ) from zerver.lib.remote_server import ( + SELF_HOSTING_REGISTRATION_TAKEOVER_CHALLENGE_TOKEN_REDIS_KEY, UserDataForRemoteBilling, get_realms_info_for_push_bouncer, send_server_data_to_push_bouncer, send_to_push_bouncer, ) from zerver.lib.response import json_success -from zerver.lib.typed_endpoint import ApnsAppId, typed_endpoint +from zerver.lib.typed_endpoint import ApnsAppId, typed_endpoint, typed_endpoint_without_parameters from zerver.models import PushDeviceToken, UserProfile from zerver.views.errors import config_error +redis_client = redis_utils.get_redis_client() + def validate_token(token_str: str, kind: int) -> None: if token_str == "" or len(token_str) > 4096: @@ -231,3 +237,31 @@ def self_hosting_auth_not_configured(request: HttpRequest) -> HttpResponse: go_back_to_url="/", go_back_to_url_name="the app", ) + + +class VerificationSecretNotPreparedError(JsonableError): + code = ErrorCode.REMOTE_SERVER_VERIFICATION_SECRET_NOT_PREPARED + + def __init__(self) -> None: + super().__init__(_("Verification secret not prepared")) + + +@typed_endpoint_without_parameters +def self_hosting_registration_takeover_challenge_verify( + request: HttpRequest, access_token: str +) -> HttpResponse: + json_data = redis_client.get( + redis_utils.REDIS_KEY_PREFIX + SELF_HOSTING_REGISTRATION_TAKEOVER_CHALLENGE_TOKEN_REDIS_KEY + ) + if json_data is None: + raise VerificationSecretNotPreparedError + + data = orjson.loads(json_data) + if data["access_token"] != access_token: + # Without knowing the access_token, the client gets the same error + # as if we're not serving the verification secret at all. + raise VerificationSecretNotPreparedError + + verification_secret = data["verification_secret"] + + return json_success(request, data={"verification_secret": verification_secret}) diff --git a/zilencer/auth.py b/zilencer/auth.py index b11635ffd2..3dc75c2441 100644 --- a/zilencer/auth.py +++ b/zilencer/auth.py @@ -1,3 +1,5 @@ +import base64 +import binascii import logging from collections.abc import Callable from functools import wraps @@ -5,6 +7,7 @@ from typing import Any, Concatenate import sentry_sdk from django.conf import settings +from django.core.signing import BadSignature, SignatureExpired, TimestampSigner from django.http import HttpRequest, HttpResponse from django.urls import path from django.urls.resolvers import URLPattern @@ -37,6 +40,32 @@ logger = logging.getLogger(__name__) ParamT = ParamSpec("ParamT") +REMOTE_SERVER_TAKEOVER_TOKEN_SALT = "remote_server_takeover" +REMOTE_SERVER_TAKEOVER_TOKEN_VALIDITY_SECONDS = 10 + + +def generate_registration_takeover_verification_secret(hostname: str) -> str: + signer = TimestampSigner(salt=REMOTE_SERVER_TAKEOVER_TOKEN_SALT) + secret = base64.b16encode(signer.sign(hostname).encode()).decode() + return secret + + +def validate_registration_takeover_verification_secret(secret: str, hostname: str) -> None: + signer = TimestampSigner(salt=REMOTE_SERVER_TAKEOVER_TOKEN_SALT) + try: + signed_data = base64.b16decode(secret).decode() + hostname_from_secret = signer.unsign( + signed_data, max_age=REMOTE_SERVER_TAKEOVER_TOKEN_VALIDITY_SECONDS + ) + except SignatureExpired: + raise JsonableError(_("The verification secret has expired")) + except BadSignature: + raise JsonableError(_("The verification secret is invalid")) + except binascii.Error: + raise JsonableError(_("The verification secret is malformed")) + if hostname_from_secret != hostname: + raise JsonableError(_("The verification secret is for a different hostname")) + class InvalidZulipServerError(JsonableError): code = ErrorCode.INVALID_ZULIP_SERVER diff --git a/zilencer/urls.py b/zilencer/urls.py index d2dfa72eb8..4fab2d99f5 100644 --- a/zilencer/urls.py +++ b/zilencer/urls.py @@ -13,8 +13,10 @@ from zilencer.views import ( remote_server_notify_push, remote_server_post_analytics, remote_server_send_test_notification, + take_over_remote_server_registration, unregister_all_remote_push_devices, unregister_remote_push_device, + verify_registration_takeover_challenge_ack_endpoint, ) i18n_urlpatterns: Any = [] @@ -28,6 +30,11 @@ push_bouncer_patterns = [ remote_server_path("remotes/push/test_notification", POST=remote_server_send_test_notification), # Push signup doesn't use the REST API, since there's no auth. path("remotes/server/register", register_remote_server), + path("remotes/server/register/takeover", take_over_remote_server_registration), + path( + "remotes/server/register/verify_challenge", + verify_registration_takeover_challenge_ack_endpoint, + ), remote_server_path("remotes/server/deactivate", POST=deactivate_remote_server), # For receiving table data used in analytics and billing remote_server_path("remotes/server/analytics", POST=remote_server_post_analytics), diff --git a/zilencer/views.py b/zilencer/views.py index c78346595d..0189002939 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -3,10 +3,11 @@ from collections import Counter from datetime import datetime, timedelta, timezone from email.headerregistry import Address from typing import Annotated, Any, TypedDict, TypeVar -from urllib.parse import urlsplit +from urllib.parse import urljoin, urlsplit from uuid import UUID import orjson +import requests.exceptions from django.conf import settings from django.core.exceptions import ValidationError from django.core.validators import URLValidator, validate_email @@ -14,7 +15,7 @@ from django.db import IntegrityError, transaction from django.db.models import Model from django.db.models.constants import OnConflict from django.http import HttpRequest, HttpResponse -from django.utils.crypto import constant_time_compare +from django.utils.crypto import constant_time_compare, get_random_string from django.utils.timezone import now as timezone_now from django.utils.translation import gettext as _ from django.utils.translation import gettext as err_ @@ -41,9 +42,11 @@ from zerver.lib.email_validation import validate_is_not_disposable from zerver.lib.exceptions import ( ErrorCode, JsonableError, + RateLimitedError, RemoteRealmServerMismatchError, RemoteServerDeactivatedError, ) +from zerver.lib.outgoing_http import OutgoingSession from zerver.lib.push_notifications import ( InvalidRemotePushDeviceTokenError, UserPushIdentityCompat, @@ -52,6 +55,7 @@ from zerver.lib.push_notifications import ( send_test_push_notification_directly_to_devices, ) from zerver.lib.queue import queue_event_on_commit +from zerver.lib.rate_limiter import rate_limit_endpoint_absolute from zerver.lib.remote_server import ( InstallationCountDataForAnalytics, RealmAuditLogDataForAnalytics, @@ -74,7 +78,11 @@ from zerver.lib.types import RemoteRealmDictValue from zerver.models.realm_audit_logs import AuditLogEventType from zerver.models.realms import DisposableEmailError from zerver.views.push_notifications import validate_token -from zilencer.auth import InvalidZulipServerKeyError +from zilencer.auth import ( + InvalidZulipServerKeyError, + generate_registration_takeover_verification_secret, + validate_registration_takeover_verification_secret, +) from zilencer.lib.remote_counts import MissingDataError from zilencer.models import ( RemoteInstallationCount, @@ -132,7 +140,11 @@ def validate_hostname_or_raise_error(hostname: str) -> None: actually know how to make requests to the server. """ try: - # TODO: Ideally we'd not abuse the URL validator this way + # We perform basic validation in two steps: + # 1. urlsplit doesn't do any proper validation, but parses the string + # and ensures that there are no extra components (e.g., path, query, fragment). + # 2. Once we know that the string is a clean netloc, we pass that do Django's + # URLValidator for validation. parsed = urlsplit(f"http://{hostname}") if parsed.path or parsed.query or parsed.fragment: @@ -147,6 +159,24 @@ def validate_hostname_or_raise_error(hostname: str) -> None: raise JsonableError(_("{hostname} is not a valid hostname").format(hostname=hostname)) +@csrf_exempt +@require_post +@typed_endpoint +def take_over_remote_server_registration(request: HttpRequest, *, hostname: str) -> HttpResponse: + validate_hostname_or_raise_error(hostname) + + if not RemoteZulipServer.objects.filter(hostname=hostname).exists(): + raise JsonableError(_("{hostname} not yet registered").format(hostname=hostname)) + + verification_secret = generate_registration_takeover_verification_secret(hostname) + return json_success( + request, + data={ + "verification_secret": verification_secret, + }, + ) + + @csrf_exempt @require_post @typed_endpoint @@ -261,6 +291,133 @@ def register_remote_server( return json_success(request, data={"created": created}) +class RegistrationTakeOverVerificationSession(OutgoingSession): + def __init__(self) -> None: + # The generous timeout and retries here are likely to be unnecessary; a functional Zulip server should + # respond instantly. + super().__init__(role="verify_registration_takeover_challenge", timeout=5, max_retries=3) + + +class EndpointUsageRateLimitError(JsonableError): + code = ErrorCode.RATE_LIMIT_HIT + http_status_code = 429 + + +@csrf_exempt +@typed_endpoint +def verify_registration_takeover_challenge_ack_endpoint( + request: HttpRequest, + *, + hostname: str, + access_token: str, +) -> HttpResponse: + """ + The host should POST to this endpoint to announce it is ready to serve the received + secret at {hostname}/zulip-services/verify/{access_token}. + The access_token is randomly generated by the host in order to prevent 3rd parties + from accessing the verification secret served at that URL. + + If we successfully verify the secret, we will send the registration credentials + to the host, completing the whole flow. + """ + + try: + # This endpoint is at risk of being used to spam another server with our requests, + # or to freeze up our Django processes by making them wait for timeouts on the + # requests triggered here. + # Since this is an extremely low-traffic endpoint, we just put an absolute limit on + # how many times it can be called in a given time period. There's little value for an + # attacker to fill up the bucket here, and issues can be handled adequately by + # manual intervention. + if settings.RATE_LIMITING: + rate_limit_endpoint_absolute("verify_registration_takeover_challenge_ack_endpoint") + except RateLimitedError: + # This rate limit being hit means we've either set the limits too low for legitimate use, + # or the endpoint is being spammed. Ideally, we want this endpoint to always be operational + # so this deserves logging a warning. + logger.warning( + "Rate limit exceeded for verify_registration_takeover_challenge_ack_endpoint" + ) + raise EndpointUsageRateLimitError( + _( + "The global limits on recent usage of this endpoint have been reached." + " Please try again later or reach out to {support_email} for assistance." + ).format(support_email=FromAddress.SUPPORT) + ) + + try: + remote_server = RemoteZulipServer.objects.get(hostname=hostname) + except RemoteZulipServer.DoesNotExist: + raise JsonableError(_("Registration not found for this hostname")) + + session = RegistrationTakeOverVerificationSession() + url = urljoin(f"https://{hostname}", f"/api/v1/zulip-services/verify/{access_token}/") + + exception_and_error_message: tuple[Exception, str] | None = None + try: + response = session.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + if check_takeover_challenge_response_secret_not_prepared(e.response): + logger.info("verify_registration_takeover:host:%s|secret_not_prepared", hostname) + raise JsonableError(_("The host reported it has no verification secret.")) + + error_message = _("Error response received from the host: {status_code}").format( + status_code=response.status_code + ) + exception_and_error_message = (e, error_message) + except requests.exceptions.SSLError as e: + error_message = "SSL error occurred while communicating with the host." + exception_and_error_message = (e, error_message) + except requests.exceptions.ConnectionError as e: + error_message = "Connection error occurred while communicating with the host." + exception_and_error_message = (e, error_message) + except requests.exceptions.Timeout as e: + error_message = "The request timed out while communicating with the host." + exception_and_error_message = (e, error_message) + except requests.exceptions.RequestException as e: + error_message = "An error occurred while communicating with the host." + exception_and_error_message = (e, error_message) + + if exception_and_error_message is not None: + exception, error_message = exception_and_error_message + logger.info("verify_registration_takeover:host:%s|exception:%s", hostname, exception) + raise JsonableError(error_message) + + data = response.json() + verification_secret = data["verification_secret"] + validate_registration_takeover_verification_secret(verification_secret, hostname) + + logger.info("verify_registration_takeover:host:%s|success", hostname) + new_secret_key = get_random_string(RemoteZulipServer.API_KEY_LENGTH) + with transaction.atomic(durable=True): + remote_server.api_key = new_secret_key + remote_server.save(update_fields=["api_key"]) + + RemoteZulipServerAuditLog.objects.create( + event_type=AuditLogEventType.REMOTE_SERVER_REGISTRATION_TRANSFERRED, + server=remote_server, + event_time=timezone_now(), + ) + + return json_success( + request, + data={"zulip_org_id": str(remote_server.uuid), "zulip_org_key": new_secret_key}, + ) + + +def check_takeover_challenge_response_secret_not_prepared(response: requests.Response) -> bool: + secret_not_prepared = False + try: + secret_not_prepared = ( + response.status_code == 400 + and response.json()["code"] == "REMOTE_SERVER_VERIFICATION_SECRET_NOT_PREPARED" + ) + except Exception: # nocoverage + return False + return secret_not_prepared + + @typed_endpoint def register_remote_push_device( request: HttpRequest, diff --git a/zproject/default_settings.py b/zproject/default_settings.py index 09fe87ab23..17c2e2de27 100644 --- a/zproject/default_settings.py +++ b/zproject/default_settings.py @@ -337,6 +337,18 @@ DEFAULT_RATE_LIMITING_RULES = { # DEFAULT_RATE_LIMITING_RULES. RATE_LIMITING_RULES: dict[str, list[tuple[int, int]]] = {} +# Rate limits for endpoints which have absolute limits on how much +# they can be used in a given time period. +# These will be extremely rare, and most likely for zilencer endpoints +# only, so we don't need a nice overriding system for them like we do +# for RATE_LIMITING_RULES. +ABSOLUTE_USAGE_LIMITS_BY_ENDPOINT = { + "verify_registration_takeover_challenge_ack_endpoint": [ + # 30 requests per day + (86400, 30), + ], +} + # Two factor authentication is not yet implementation-complete TWO_FACTOR_AUTHENTICATION_ENABLED = False diff --git a/zproject/urls.py b/zproject/urls.py index 6e84e2e1db..5074acea5e 100644 --- a/zproject/urls.py +++ b/zproject/urls.py @@ -102,6 +102,7 @@ from zerver.views.push_notifications import ( self_hosting_auth_json_endpoint, self_hosting_auth_not_configured, self_hosting_auth_redirect_endpoint, + self_hosting_registration_takeover_challenge_verify, send_test_push_notification_api, ) from zerver.views.reactions import add_reaction, remove_reaction @@ -868,6 +869,13 @@ urls += [ ), ] +urls += [ + path( + "api/v1/zulip-services/verify//", + self_hosting_registration_takeover_challenge_verify, + ), +] + if not settings.CORPORATE_ENABLED: # nocoverage # This conditional behavior cannot be tested directly, since # urls.py is not readily reloaded in Django tests. See the block