Files
zulip/zerver/views/sentry.py
Alex Vandiver cdc3413a4f sentry: Add a circuit-breaker around attempts to report to Sentry.
The goal is to reduce load on Sentry if the service is timing out, and
to reduce uwsgi load from long requests.  This circuit-breaker is
per-Django-process, so may require more than 2 failures overall before
it trips, and may also "partially" trip for some (but not all)
workers.  Since all of this is best-effort, this is fine.

Because this is only for load reduction, we only circuit-breaker on
timeouts, and not unexpected HTTP response codes or the like.

See also #26229, which would move all browser-submitted Sentry
reporting into a single process, which would allow circuit-breaking to
be more effective.
2023-10-11 09:44:18 -07:00

114 lines
4.5 KiB
Python

import logging
import urllib
from contextlib import suppress
import orjson
from circuitbreaker import CircuitBreakerError, circuit
from django.conf import settings
from django.http import HttpRequest, HttpResponse
from django.utils.translation import gettext as _
from django.views.decorators.csrf import csrf_exempt
from requests.exceptions import ProxyError, RequestException, Timeout
from sentry_sdk.integrations.logging import ignore_logger
from zerver.lib.exceptions import JsonableError
from zerver.lib.outgoing_http import OutgoingSession
from zerver.lib.validator import check_url, to_wild_value
# In order to not overload Sentry if it's having a bad day, we tell
# Sentry to ignore exceptions that we have when talking to Sentry.
logger = logging.getLogger(__name__)
ignore_logger(logger.name)
class SentryTunnelSession(OutgoingSession):
def __init__(self) -> None:
super().__init__(role="sentry_tunnel", timeout=1)
@csrf_exempt
def sentry_tunnel(
request: HttpRequest,
) -> HttpResponse:
try:
envelope_header_line, envelope_items = request.body.split(b"\n", 1)
envelope_header = to_wild_value("envelope_header", envelope_header_line.decode("utf-8"))
dsn = urllib.parse.urlparse(envelope_header["dsn"].tame(check_url))
except Exception:
raise JsonableError(_("Invalid request format"))
if dsn.geturl() != settings.SENTRY_FRONTEND_DSN:
raise JsonableError(_("Invalid DSN"))
assert dsn.hostname
project_id = dsn.path.strip("/")
url = dsn._replace(netloc=dsn.hostname, path=f"/api/{project_id}/envelope/").geturl()
# Adjust the payload to explicitly contain the IP address of the
# user we see. If left blank, Sentry will assume the IP it
# received the request from, which is Zulip's, which can make
# debugging more complicated.
updated_body = request.body
# If we fail to update the body for any reason, leave it as-is; it
# is better to misreport the IP than to drop the report entirely.
with suppress(Exception):
# This parses the Sentry ingestion format, known as an
# Envelope. See https://develop.sentry.dev/sdk/envelopes/ for
# spec.
parts = [envelope_header_line, b"\n"]
while envelope_items != b"":
item_header_line, rest = envelope_items.split(b"\n", 1)
parts.append(item_header_line)
parts.append(b"\n")
item_header = orjson.loads(item_header_line.decode("utf-8"))
length = item_header.get("length")
if length is None:
item_body, envelope_items = [*rest.split(b"\n", 1), b""][:2]
else:
item_body, envelope_items = rest[0:length], rest[length:]
if item_header.get("type") in ("transaction", "event"):
# Event schema:
# https://develop.sentry.dev/sdk/event-payloads/#core-interfaces
# https://develop.sentry.dev/sdk/event-payloads/user/
#
# Transaction schema:
# https://develop.sentry.dev/sdk/event-payloads/transaction/#anatomy
# Note that "Transactions are Events enriched with Span data."
payload_data = orjson.loads(item_body)
if "user" in payload_data:
payload_data["user"]["ip_address"] = request.META.get("REMOTE_ADDR")
item_body = orjson.dumps(payload_data)
parts.append(item_body)
if length is None:
parts.append(b"\n")
updated_body = b"".join(parts)
try:
sentry_request(url, updated_body)
except CircuitBreakerError:
logger.warning("Dropped a client exception due to circuit-breaking")
except RequestException as e:
# This logger has been configured, above, to not report to Sentry
logger.exception(e)
return HttpResponse(status=200)
# Circuit-break and temporarily stop trying to report to
# Sentry if it keeps timing out. We include ProxyError in
# here because we are likely making our requests through
# Smokescreen as a CONNECT proxy, so failures from Smokescreen
# failing to connect at the TCP level will report as
# ProxyErrors.
@circuit(
failure_threshold=2,
recovery_timeout=30,
name="Sentry tunnel",
expected_exception=(ProxyError, Timeout),
)
def sentry_request(url: str, data: bytes) -> None:
SentryTunnelSession().post(
url=url,
data=data,
headers={"Content-Type": "application/x-sentry-envelope"},
).raise_for_status()