push_notifications: Set a timeout on FCM requests.

In steady-state, requests to FCM take about a second; however, in
cases where the remote FCM server is unstable, the request has been
observed to block for more than a minute.

As noted in the previous commit, pushes must complete within 30s;
fail fast, and let the retries and exponential backoff handle errors.

The worst-case total time taken with timeouts and errors for an FCM
notification is 19.5s.  Unfortunately, `aioapns` does not appear to
have any timeouts, and thus this commit cannot guarantee a total of
fewer than 30s.
This commit is contained in:
Alex Vandiver
2022-03-08 09:39:55 -08:00
committed by Tim Abbott
parent f531f3a27f
commit 3264361f63

View File

@@ -22,6 +22,7 @@ from zerver.decorator import statsd_increment
from zerver.lib.avatar import absolute_avatar_url
from zerver.lib.exceptions import JsonableError
from zerver.lib.message import access_message, bulk_access_messages_expect_usermessage, huddle_users
from zerver.lib.outgoing_http import OutgoingSession
from zerver.lib.remote_server import send_json_to_push_bouncer, send_to_push_bouncer
from zerver.lib.timestamp import datetime_to_timestamp
from zerver.models import (
@@ -216,6 +217,12 @@ def send_apple_push_notification(
#
class FCMSession(OutgoingSession):
def __init__(self) -> None:
# We don't set up retries, since the gcm package does that for us.
super().__init__(role="fcm", timeout=5)
def make_gcm_client() -> gcm.GCM: # nocoverage
# From GCM upstream's doc for migrating to FCM:
#
@@ -342,11 +349,17 @@ def send_android_push_notification(
# See https://firebase.google.com/docs/cloud-messaging/http-server-ref .
# Two kwargs `retries` and `session` get eaten by `json_request`;
# the rest pass through to the GCM server.
#
# One initial request plus 2 retries, with 5-second timeouts,
# and expected 1 + 2 seconds (the gcm module jitters its
# backoff by ±50%, so worst case * 1.5) between them, totals
# 18s expected, up to 19.5s worst case.
res = gcm_client.json_request(
registration_ids=reg_ids,
priority=priority,
data=data,
retries=2,
session=FCMSession(),
)
except OSError:
logger.warning("Error while pushing to GCM", exc_info=True)