From 4ae3d6e0a357c72d36a7c76aed0a3e66768298b1 Mon Sep 17 00:00:00 2001 From: tess1o Date: Tue, 10 Dec 2024 12:56:53 +0200 Subject: [PATCH] Added documentation how to configure alert manager --- README.md | 3 + docker-compose/alertmanager-compose.yml | 11 ++++ docker-compose/alertmanager/alertmanager.yml | 43 ++++++++++++ .../alertmanager/templates/telegram.tmpl | 52 +++++++++++++++ docker-compose/prometheus/alerts/ecoflow.yml | 65 +++++++++++++++++++ docker-compose/prometheus/prometheus.yml | 12 ++++ docs/alertmanager.md | 38 +++++++++++ 7 files changed, 224 insertions(+) create mode 100644 docker-compose/alertmanager-compose.yml create mode 100644 docker-compose/alertmanager/alertmanager.yml create mode 100644 docker-compose/alertmanager/templates/telegram.tmpl create mode 100644 docker-compose/prometheus/alerts/ecoflow.yml create mode 100644 docs/alertmanager.md diff --git a/README.md b/README.md index b9bc8b1..4ac55ea 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ flexibility. See documentation here: [Redis](docs/redis.md) +## How to add AlertManager +See documentation here: [alertmanager.md](docs/alertmanager.md) + ## Compare to other exporters This implementation is inspired by https://github.com/berezhinskiy/ecoflow_exporter, and it's fully diff --git a/docker-compose/alertmanager-compose.yml b/docker-compose/alertmanager-compose.yml new file mode 100644 index 0000000..446f2da --- /dev/null +++ b/docker-compose/alertmanager-compose.yml @@ -0,0 +1,11 @@ +services: + alertmanager: + image: prom/alertmanager + container_name: alertmanager + ports: + - "9093:9093" + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + restart: unless-stopped + volumes: + - ./alertmanager:/etc/alertmanager \ No newline at end of file diff --git a/docker-compose/alertmanager/alertmanager.yml b/docker-compose/alertmanager/alertmanager.yml new file mode 100644 index 0000000..24d81f2 --- /dev/null +++ b/docker-compose/alertmanager/alertmanager.yml @@ -0,0 +1,43 @@ +route: + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 10s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 30s + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 12h + + group_by: + - alertname + - alertstate + - device + + receiver: telegram + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + routes: + - receiver: telegram + group_wait: 5s + match_re: + severity: critial|warning + continue: true + +templates: + - /etc/alertmanager/templates/*.tmpl + +receivers: + - name: telegram + telegram_configs: + - bot_token: YOUR_BOT_TOKEN_HERE + chat_id: YOUR_CHAT_ID_HERE + api_url: https://api.telegram.org + message: '{{ template "telegram.template" . }}' + parse_mode: MarkdownV2 diff --git a/docker-compose/alertmanager/templates/telegram.tmpl b/docker-compose/alertmanager/templates/telegram.tmpl new file mode 100644 index 0000000..ce88bd8 --- /dev/null +++ b/docker-compose/alertmanager/templates/telegram.tmpl @@ -0,0 +1,52 @@ +{{/* Emoji of the alert */}} +{{- define "__telegram_emoji" -}} + {{- if gt (len .Alerts.Firing) 0 -}} + {{- if eq .CommonLabels.severity "critical" -}} + ‼️ + {{- else if eq .CommonLabels.severity "warning" -}} + ⚠️ + {{- else if eq .CommonLabels.severity "info" -}} + ℹ️ + {{- else -}} + 🤷🏻‍♂️ + {{- end -}} + {{- end }} + {{- if gt (len .Alerts.Resolved) 0 -}} + ✅ + {{- end -}} +{{- end -}} + +{{/* Status of the alert */}} +{{- define "__telegram_status" -}} + {{- if gt (len .Alerts.Firing) 0 -}} + \[FIRING:{{ (len .Alerts.Firing) }}\] + {{- end }} + {{- if gt (len .Alerts.Resolved) 0 -}} + \[RESOLVED\] + {{- end -}} +{{- end -}} + +{{/* Title of the alert */}} +{{- define "__telegram_title" -}} + {{- if (index .Alerts 0).Annotations.summary -}} + {{ (index .Alerts 0).Annotations.summary }} + {{- end -}} +{{- end -}} + +{{/* The text to display in the alert */}} +{{- define "telegram.template" -}} + {{ template "__telegram_emoji" . }} {{ template "__telegram_status" . }} {{ template "__telegram_title" . }} + {{- "\n" -}} + {{- "\n" -}} + {{- range .Alerts -}} + {{- if .Annotations.description -}} + *Description*: {{ .Annotations.description }} + {{- "\n" -}} + {{- end }} + {{- if .Annotations.message -}} + *Message*: {{ .Annotations.message }} + {{- "\n" -}} + {{- "\n" -}} + {{- end }} + {{- end -}} +{{- end -}} \ No newline at end of file diff --git a/docker-compose/prometheus/alerts/ecoflow.yml b/docker-compose/prometheus/alerts/ecoflow.yml new file mode 100644 index 0000000..eeed34b --- /dev/null +++ b/docker-compose/prometheus/alerts/ecoflow.yml @@ -0,0 +1,65 @@ +groups: + - name: EcoFlow + rules: + - alert: EcoflowTempTooHigh + expr: ecoflow_inv_out_temp > 60 + for: 1m + labels: + severity: critical + annotations: + summary: Inverter temperature is too high + description: "Inverter temperature {{ $labels.device }} is too high: current value is {{ $value }}" + + - alert: EcoFlowOffline + expr: ecoflow_online == 0 + for: 1m + labels: + severity: critical + annotations: + summary: EcoFlow is offline + description: "Device {{ $labels.device }} has disappeared from the network" + + - alert: EcoFlowPowerOutage + expr: ecoflow_inv_ac_in_vol == 0 + for: 0m + labels: + severity: warning + annotations: + summary: EcoFlow detects power outage + description: "Device {{ $labels.device }} detects power outage" + + - alert: EcoFlowLowRemainingTime + expr: ecoflow_bms_ems_status_dsg_remain_time < 10 + for: 0m + labels: + severity: critical + annotations: + summary: EcoFlow will discharge soon + description: "Device {{ $labels.device }} will discharge in {{ $value }} min" + + - alert: EcoFlowHalfBattery + expr: ecoflow_bms_bms_status_f32_show_soc < 50 + for: 0m + labels: + severity: warning + annotations: + summary: EcoFlow half battery + description: "Device {{ $labels.device }} battery level less than 50%" + + - alert: EcoFlowLowBattery + expr: ecoflow_bms_bms_status_f32_show_soc < 10 + for: 0m + labels: + severity: critical + annotations: + summary: EcoFlow low battery + description: "Device {{ $labels.device }} battery level less than 10%" + + - alert: EcoFlowHighLoad + expr: ecoflow_inv_output_watts > 1700 and ecoflow_inv_ac_in_vol == 0 + for: 0m + labels: + severity: warning + annotations: + summary: EcoFlow high load + description: "Device {{ $labels.device }} under high load: {{ $value }}W" \ No newline at end of file diff --git a/docker-compose/prometheus/prometheus.yml b/docker-compose/prometheus/prometheus.yml index 30881be..a42fada 100755 --- a/docker-compose/prometheus/prometheus.yml +++ b/docker-compose/prometheus/prometheus.yml @@ -3,6 +3,18 @@ global: scrape_timeout: 10s evaluation_interval: 30s # Evaluate rules every 30 seconds. The default is every 1 minute. +# add alertmanager. Remove this if you don't need alerts +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - alertmanager:9093 + +# can be removed if alertmanager is not used +rule_files: + - 'alerts/*.yml' + scrape_configs: - job_name: prometheus static_configs: diff --git a/docs/alertmanager.md b/docs/alertmanager.md new file mode 100644 index 0000000..79aab8d --- /dev/null +++ b/docs/alertmanager.md @@ -0,0 +1,38 @@ +## How to add Alertmanager to get alerts + +You can add AlertManager and configure Prometheus to send alert notifications depending on metric values. For instance when some +device goes offline or battery level is less than some value or temperature is too high, etc + +### Pre-requisites: + +Step #1 is mandatory, all other steps have good enough default settings. You can adjust them if required. + +1. Update `bot_token` and `chat_id` in [alertmanager.yml](../docker-compose/alertmanager/alertmanager.yml) +2. Make sure that `alertmanagers` is added to + `prometheus.yml`: [prometheus.yml](../docker-compose/prometheus/prometheus.yml) +3. Configure alert rules in Prometheus. See example here: [ecoflow.yml](../docker-compose/prometheus/alerts/ecoflow.yml) +4. Adjust notification template if needed: [telegram.tmpl](../docker-compose/alertmanager/templates/telegram.tmpl) + +### Installation + +* Run `docker compose -f prometheus-compose.yml stop` to stop Prometheus (can be skipped if Prometheus is not running) +* Run `docker compose -f alertmanager-compose.yml up -d` to start Alertmanager +* Run `docker compose -f prometheus-compose.yml up -d` to start Prometheus + +As a result you should get notifications in Telegram: + +``` +‼️ [FIRING:1] Inverter temperature is too high + +Description: Inverter temperature Delta 2 Max is too high +``` + +``` +✅ [RESOLVED] Inverter temperature is too high + +Description: Inverter temperature Delta 2 Max is too high +``` + +To find `chat_id` you can use this approach: https://stackoverflow.com/questions/32423837/telegram-bot-how-to-get-a-group-chat-id + +Almost all configs were taken from this repository: https://github.com/berezhinskiy/ecoflow_exporter \ No newline at end of file