From fc2aac6baaedc541f06558b9a215651f95b91e03 Mon Sep 17 00:00:00 2001 From: Ryan Crisanti Date: Thu, 29 Feb 2024 13:10:12 -0500 Subject: [PATCH] integration: Update topic & content of grafana webhook. Sending to a topic based on the number of firing alerts makes no sense, and leads to conversations and alerts scattered randomly across topics based on how on fire the alerting is. Send a separate message for each alert in the Grafana webhook payload, with the alert's name as its topic; if no alert name can be found, fall back to the alert's fingerprint. Also include all alert values in the body of the message, along with links to the alert generator, silence, and image, if available. Co-authored-by: Alex Vandiver --- zerver/lib/integrations.py | 2 +- .../fixtures/alert_no_alertname_v11.json | 51 +++++++ .../grafana/fixtures/alert_values_v11.json | 52 +++++++ zerver/webhooks/grafana/tests.py | 129 ++++++++++++++++-- zerver/webhooks/grafana/view.py | 97 +++++++++---- 5 files changed, 285 insertions(+), 46 deletions(-) create mode 100644 zerver/webhooks/grafana/fixtures/alert_no_alertname_v11.json create mode 100644 zerver/webhooks/grafana/fixtures/alert_values_v11.json diff --git a/zerver/lib/integrations.py b/zerver/lib/integrations.py index 2acf711171..85c9f9466b 100644 --- a/zerver/lib/integrations.py +++ b/zerver/lib/integrations.py @@ -758,7 +758,7 @@ DOC_SCREENSHOT_CONFIG: Dict[str, List[BaseScreenshotConfig]] = { "gocd": [ScreenshotConfig("pipeline.json")], "gogs": [ScreenshotConfig("pull_request__opened.json")], "gosquared": [ScreenshotConfig("traffic_spike.json", image_name="000.png")], - "grafana": [ScreenshotConfig("alert_v7.json")], + "grafana": [ScreenshotConfig("alert_values_v11.json")], "greenhouse": [ScreenshotConfig("candidate_stage_change.json", image_name="000.png")], "groove": [ScreenshotConfig("ticket_started.json")], "harbor": [ScreenshotConfig("scanning_completed.json")], diff --git a/zerver/webhooks/grafana/fixtures/alert_no_alertname_v11.json b/zerver/webhooks/grafana/fixtures/alert_no_alertname_v11.json new file mode 100644 index 0000000000..d641a80cdf --- /dev/null +++ b/zerver/webhooks/grafana/fixtures/alert_no_alertname_v11.json @@ -0,0 +1,51 @@ +{ + "receiver": "Debug webhook", + "status": "firing", + "alerts": [ + { + "status": "firing", + "labels": { + "debug": "true", + "grafana_folder": "device" + }, + "annotations": { + "summary": "High memory usage" + }, + "startsAt": "2024-03-01T02:09:00Z", + "endsAt": "0001-01-01T00:00:00Z", + "generatorURL": "https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1", + "fingerprint": "e6349a25f5ef0e9e", + "silenceURL": "https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1", + "dashboardURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1", + "panelURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2", + "values": { + "A": 2473545728, + "B": 0, + "C": 1, + "minute": 9 + }, + "valueString": "[ var='A' labels={instance=node_exporter:9100, job=node} value=2.473545728e+09 ], [ var='B' labels={instance=node_exporter:9100, job=node} value=0 ], [ var='C' labels={} value=1 ], [ var='minute' labels={} value=9 ]", + "imageURL": "https://grafana.com/assets/img/blog/mixed_styles.png" + } + ], + "groupLabels": { + "alertname": "Memory (copy)", + "grafana_folder": "device" + }, + "commonLabels": { + "alertname": "Memory (copy)", + "debug": "true", + "grafana_folder": "device" + }, + "commonAnnotations": { + "summary": "High memory usage" + }, + "externalURL": "https://play.grafana.org/", + "version": "1", + "groupKey": "{}/{debug=\"true\"}:{alertname=\"Memory (copy)\", grafana_folder=\"device\"}", + "truncatedAlerts": 0, + "orgId": 1, + "title": "[FIRING:1] Memory (copy) device (true)", + "state": "alerting", + "message": "**Firing**\n\nValue: A=2.473545728e+09, B=0, C=1, minute=9\nLabels:\n - alertname = Memory (copy)\n - debug = true\n - grafana_folder = device\nAnnotations:\n - summary = High memory usage\nSource: https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1\nSilence: https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1\nDashboard: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\nPanel: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2\n" +} diff --git a/zerver/webhooks/grafana/fixtures/alert_values_v11.json b/zerver/webhooks/grafana/fixtures/alert_values_v11.json new file mode 100644 index 0000000000..6d78dfe959 --- /dev/null +++ b/zerver/webhooks/grafana/fixtures/alert_values_v11.json @@ -0,0 +1,52 @@ +{ + "receiver": "Debug webhook", + "status": "firing", + "alerts": [ + { + "status": "firing", + "labels": { + "alertname": "Memory (copy)", + "debug": "true", + "grafana_folder": "device" + }, + "annotations": { + "summary": "High memory usage" + }, + "startsAt": "2024-03-01T02:09:00Z", + "endsAt": "0001-01-01T00:00:00Z", + "generatorURL": "https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1", + "fingerprint": "e6349a25f5ef0e9e", + "silenceURL": "https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1", + "dashboardURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1", + "panelURL": "https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2", + "values": { + "A": 2473545728, + "B": 0, + "C": 1, + "minute": 9 + }, + "valueString": "[ var='A' labels={instance=node_exporter:9100, job=node} value=2.473545728e+09 ], [ var='B' labels={instance=node_exporter:9100, job=node} value=0 ], [ var='C' labels={} value=1 ], [ var='minute' labels={} value=9 ]", + "imageURL": "https://grafana.com/assets/img/blog/mixed_styles.png" + } + ], + "groupLabels": { + "alertname": "Memory (copy)", + "grafana_folder": "device" + }, + "commonLabels": { + "alertname": "Memory (copy)", + "debug": "true", + "grafana_folder": "device" + }, + "commonAnnotations": { + "summary": "High memory usage" + }, + "externalURL": "https://play.grafana.org/", + "version": "1", + "groupKey": "{}/{debug=\"true\"}:{alertname=\"Memory (copy)\", grafana_folder=\"device\"}", + "truncatedAlerts": 0, + "orgId": 1, + "title": "[FIRING:1] Memory (copy) device (true)", + "state": "alerting", + "message": "**Firing**\n\nValue: A=2.473545728e+09, B=0, C=1, minute=9\nLabels:\n - alertname = Memory (copy)\n - debug = true\n - grafana_folder = device\nAnnotations:\n - summary = High memory usage\nSource: https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1\nSilence: https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1\nDashboard: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\nPanel: https://play.grafana.org/d/ece9fb32-7f71-4be9-bd94-2f23608ae5b9?orgId=1\u0026viewPanel=2\n" +} diff --git a/zerver/webhooks/grafana/tests.py b/zerver/webhooks/grafana/tests.py index e1d7c17ed8..2d6fe42b5b 100644 --- a/zerver/webhooks/grafana/tests.py +++ b/zerver/webhooks/grafana/tests.py @@ -128,14 +128,11 @@ Someone is testing the alert notification within grafana. ) def test_alert_v8(self) -> None: - expected_topic_name = "[RESOLVED:1]" + expected_topic_name = "[TestAlert]" expected_message = """ :checkbox: **RESOLVED** -Webhook test message. - ---- -**Alert 1**: TestAlert. +**TestAlert** This alert was fired at . @@ -145,10 +142,13 @@ Labels: - alertname: TestAlert - instance: Grafana +Values: +[ metric='foo' labels={instance=bar} value=10 ] + Annotations: - summary: Notification test -1 alert(s) truncated. +[Silence](https://zuliptestingwh2.grafana.net/alerting/silence/new?alertmanager=grafana&matcher=alertname%3DTestAlert&matcher=instance%3DGrafana) """.strip() self.check_webhook( @@ -159,44 +159,143 @@ Annotations: ) def test_alert_multiple_v8(self) -> None: - expected_topic_name = "[FIRING:2]" - expected_message = """ + expected_topic_name_1 = "[High memory usage]" + expected_topic_name_2 = "[High CPU usage]" + expected_message_1 = """ :alert: **FIRING** -Webhook test message. - ---- -**Alert 1**: High memory usage. +**High memory usage** This alert was fired at . + Labels: - alertname: High memory usage - team: blue - zone: us-1 +Values: +[ metric='' labels={} value=14151.331895396988 ] + Annotations: - description: The system has high memory usage - runbook_url: https://myrunbook.com/runbook/1234 - summary: This alert was triggered for zone us-1 +[Generator](https://play.grafana.org/alerting/1afz29v7z/edit) +[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana&matchers=alertname%3DT2%2Cteam%3Dblue%2Czone%3Dus-1) +""".strip() + expected_message_2 = """ +:alert: **FIRING** ---- -**Alert 2**: High CPU usage. +**High CPU usage** This alert was fired at . + Labels: - alertname: High CPU usage - team: blue - zone: eu-1 +Values: +[ metric='' labels={} value=47043.702386305304 ] + Annotations: - description: The system has high CPU usage - runbook_url: https://myrunbook.com/runbook/1234 - summary: This alert was triggered for zone eu-1 + +[Generator](https://play.grafana.org/alerting/d1rdpdv7k/edit) +[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana&matchers=alertname%3DT1%2Cteam%3Dblue%2Czone%3Deu-1) +""".strip() + + self.subscribe(self.test_user, self.CHANNEL_NAME) + payload = self.get_body("alert_multiple_v8") + + msg = self.send_webhook_payload( + self.test_user, + self.url, + payload, + content_type="application/json", + ) + + msg = self.get_second_to_last_message() + self.assert_channel_message( + message=msg, + channel_name=self.CHANNEL_NAME, + topic_name=expected_topic_name_1, + content=expected_message_1, + ) + + msg = self.get_last_message() + self.assert_channel_message( + message=msg, + channel_name=self.CHANNEL_NAME, + topic_name=expected_topic_name_2, + content=expected_message_2, + ) + + def test_alert_values_v11(self) -> None: + expected_topic_name = "[Memory (copy)]" # alertname + expected_message = """ +:alert: **FIRING** + +**Memory (copy)** + +This alert was fired at . + +Labels: +- alertname: Memory (copy) +- debug: true +- grafana_folder: device + +Values: +- A: 2473545728 +- B: 0 +- C: 1 +- minute: 9 + +Annotations: +- summary: High memory usage + +[Generator](https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1) +[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1) +[Image](https://grafana.com/assets/img/blog/mixed_styles.png) """.strip() self.check_webhook( - "alert_multiple_v8", + "alert_values_v11", + expected_topic_name, + expected_message, + content_type="application/x-www-form-urlencoded", + ) + + def test_alert_no_alertname_v11(self) -> None: + expected_topic_name = "[e6349a25f5ef0e9e]" # fingerprint + expected_message = """ +:alert: **FIRING** + +This alert was fired at . + +Labels: +- debug: true +- grafana_folder: device + +Values: +- A: 2473545728 +- B: 0 +- C: 1 +- minute: 9 + +Annotations: +- summary: High memory usage + +[Generator](https://play.grafana.org/alerting/grafana/dd2f0260-3cfc-4c65-a4c4-f3f632c551f4/view?orgId=1) +[Silence](https://play.grafana.org/alerting/silence/new?alertmanager=grafana\u0026matcher=alertname%3DMemory+%28copy%29\u0026matcher=debug%3Dtrue\u0026matcher=grafana_folder%3Ddevice\u0026orgId=1) +[Image](https://grafana.com/assets/img/blog/mixed_styles.png) +""".strip() + + self.check_webhook( + "alert_no_alertname_v11", expected_topic_name, expected_message, content_type="application/x-www-form-urlencoded", diff --git a/zerver/webhooks/grafana/view.py b/zerver/webhooks/grafana/view.py index bfcc3113bd..317a949908 100644 --- a/zerver/webhooks/grafana/view.py +++ b/zerver/webhooks/grafana/view.py @@ -5,6 +5,7 @@ from zerver.lib.response import json_success from zerver.lib.typed_endpoint import JsonBodyPayload, typed_endpoint from zerver.lib.validator import ( WildValue, + check_anything, check_float, check_int, check_none_or, @@ -21,20 +22,23 @@ ALERT_STATUS_TEMPLATE = "{alert_icon} **{alert_state}**\n\n" OLD_MESSAGE_TEMPLATE = "{alert_status}[{rule_name}]({rule_url})\n\n{alert_message}{eval_matches}" -NEW_TOPIC_TEMPLATE = "[{alert_status}:{alert_count}]" +NEW_TOPIC_TEMPLATE = "[{alertname}]" -ALERT_HEADER_TEMPLATE = """\n--- -**Alert {count}**""" +START_TIME_TEMPLATE = "This alert was fired at ." -START_TIME_TEMPLATE = "\n\nThis alert was fired at .\n" +END_TIME_TEMPLATE = "\n\nThis alert was resolved at ." -END_TIME_TEMPLATE = "\nThis alert was resolved at .\n\n" +MESSAGE_LABELS_TEMPLATE = "\n\nLabels:\n{label_information}\n" -MESSAGE_LABELS_TEMPLATE = "Labels:\n{label_information}\n" +MESSAGE_VALUES_TEMPLATE = "Values:\n{value_information}\n" -MESSAGE_ANNOTATIONS_TEMPLATE = "Annotations:\n{annotation_information}\n" +MESSAGE_ANNOTATIONS_TEMPLATE = "Annotations:\n{annotation_information}" -TRUNCATED_ALERTS_TEMPLATE = "{count} alert(s) truncated.\n" +MESSAGE_GENERATOR_TEMPLATE = "\n[Generator]({generator_url})" + +MESSAGE_SILENCE_TEMPLATE = "\n[Silence]({silence_url})" + +MESSAGE_IMAGE_TEMPLATE = "\n[Image]({image_url})" LEGACY_EVENT_TYPES = ["ok", "pending", "alerting", "paused"] @@ -53,24 +57,31 @@ def api_grafana_webhook( ) -> HttpResponse: # Grafana alerting system. if "alerts" in payload: - status = payload["status"].tame(check_string_in(["firing", "resolved"])) - alert_count = len(payload["alerts"]) - - topic_name = NEW_TOPIC_TEMPLATE.format(alert_status=status.upper(), alert_count=alert_count) - - if status == "firing": - body = ALERT_STATUS_TEMPLATE.format(alert_icon=":alert:", alert_state=status.upper()) - else: - body = ALERT_STATUS_TEMPLATE.format(alert_icon=":checkbox:", alert_state=status.upper()) - - if payload["message"]: - body += payload["message"].tame(check_string) + "\n" - - for index, alert in enumerate(payload["alerts"], 1): - body += ALERT_HEADER_TEMPLATE.format(count=index) + # Grafana 8.0 and above alerting; works for: + # - https://grafana.com/docs/grafana/v8.0/alerting/unified-alerting/message-templating/template-data/ + # - https://grafana.com/docs/grafana/v9.0/alerting/contact-points/notifiers/webhook-notifier/ + # - https://grafana.com/docs/grafana/v10.0/alerting/alerting-rules/manage-contact-points/webhook-notifier/ + # - https://grafana.com/docs/grafana/v11.0/alerting/configure-notifications/manage-contact-points/integrations/webhook-notifier/ + for alert in payload["alerts"]: + status = alert["status"].tame(check_string_in(["firing", "resolved"])) + if status == "firing": + body = ALERT_STATUS_TEMPLATE.format( + alert_icon=":alert:", alert_state=status.upper() + ) + else: + body = ALERT_STATUS_TEMPLATE.format( + alert_icon=":checkbox:", alert_state=status.upper() + ) if "alertname" in alert["labels"] and alert["labels"]["alertname"]: - body += ": " + alert["labels"]["alertname"].tame(check_string) + "." + alertname = alert["labels"]["alertname"].tame(check_string) + topic_name = NEW_TOPIC_TEMPLATE.format(alertname=alertname) + body += "**" + alertname + "**\n\n" + else: + # if no alertname, fallback to the alert fingerprint + topic_name = NEW_TOPIC_TEMPLATE.format( + alertname=alert["fingerprint"].tame(check_string) + ) body += START_TIME_TEMPLATE.format(start_time=alert["startsAt"].tame(check_string)) @@ -84,6 +95,19 @@ def api_grafana_webhook( label_information += "- " + key + ": " + value.tame(check_string) + "\n" body += MESSAGE_LABELS_TEMPLATE.format(label_information=label_information) + if alert.get("values"): + value_information = "" + for key, value in alert["values"].items(): + value_information += "- " + key + ": " + str(value.tame(check_anything)) + "\n" + body += MESSAGE_VALUES_TEMPLATE.format(value_information=value_information) + elif alert.get("valueString"): + body += ( + MESSAGE_VALUES_TEMPLATE.format( + value_information=alert["valueString"].tame(check_string) + ) + + "\n" + ) + if alert["annotations"]: annotation_information = "" for key, value in alert["annotations"].items(): @@ -92,17 +116,30 @@ def api_grafana_webhook( annotation_information=annotation_information ) - if payload["truncatedAlerts"]: - body += TRUNCATED_ALERTS_TEMPLATE.format( - count=payload["truncatedAlerts"].tame(check_int) - ) + if alert["generatorURL"]: + body += MESSAGE_GENERATOR_TEMPLATE.format( + generator_url=alert["generatorURL"].tame(check_string) + ) - check_send_webhook_message(request, user_profile, topic_name, body, status) + if alert["silenceURL"]: + body += MESSAGE_SILENCE_TEMPLATE.format( + silence_url=alert["silenceURL"].tame(check_string) + ) + + if alert.get("imageURL"): + body += MESSAGE_IMAGE_TEMPLATE.format( + image_url=alert["imageURL"].tame(check_string) + ) + + body += "\n" + + check_send_webhook_message(request, user_profile, topic_name, body, status) return json_success(request) - # Legacy Grafana alerts. else: + # Grafana 7.0 alerts: + # https://grafana.com/docs/grafana/v7.0/alerting/notifications/#webhook topic_name = OLD_TOPIC_TEMPLATE.format(alert_title=payload["title"].tame(check_string)) eval_matches_text = ""