Added documentation how to configure alert manager

This commit is contained in:
tess1o
2024-12-10 12:56:53 +02:00
parent 0f421c4662
commit 4ae3d6e0a3
7 changed files with 224 additions and 0 deletions

View File

@@ -54,6 +54,9 @@ flexibility.
See documentation here: [Redis](docs/redis.md)
## How to add AlertManager
See documentation here: [alertmanager.md](docs/alertmanager.md)
## Compare to other exporters
This implementation is inspired by https://github.com/berezhinskiy/ecoflow_exporter, and it's fully

View File

@@ -0,0 +1,11 @@
services:
alertmanager:
image: prom/alertmanager
container_name: alertmanager
ports:
- "9093:9093"
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
restart: unless-stopped
volumes:
- ./alertmanager:/etc/alertmanager

View File

@@ -0,0 +1,43 @@
route:
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 10s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 30s
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 12h
group_by:
- alertname
- alertstate
- device
receiver: telegram
# All the above attributes are inherited by all child routes and can
# overwritten on each.
routes:
- receiver: telegram
group_wait: 5s
match_re:
severity: critial|warning
continue: true
templates:
- /etc/alertmanager/templates/*.tmpl
receivers:
- name: telegram
telegram_configs:
- bot_token: YOUR_BOT_TOKEN_HERE
chat_id: YOUR_CHAT_ID_HERE
api_url: https://api.telegram.org
message: '{{ template "telegram.template" . }}'
parse_mode: MarkdownV2

View File

@@ -0,0 +1,52 @@
{{/* Emoji of the alert */}}
{{- define "__telegram_emoji" -}}
{{- if gt (len .Alerts.Firing) 0 -}}
{{- if eq .CommonLabels.severity "critical" -}}
‼️
{{- else if eq .CommonLabels.severity "warning" -}}
⚠️
{{- else if eq .CommonLabels.severity "info" -}}
{{- else -}}
🤷🏻‍♂️
{{- end -}}
{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{- end -}}
{{- end -}}
{{/* Status of the alert */}}
{{- define "__telegram_status" -}}
{{- if gt (len .Alerts.Firing) 0 -}}
\[FIRING:{{ (len .Alerts.Firing) }}\]
{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
\[RESOLVED\]
{{- end -}}
{{- end -}}
{{/* Title of the alert */}}
{{- define "__telegram_title" -}}
{{- if (index .Alerts 0).Annotations.summary -}}
{{ (index .Alerts 0).Annotations.summary }}
{{- end -}}
{{- end -}}
{{/* The text to display in the alert */}}
{{- define "telegram.template" -}}
{{ template "__telegram_emoji" . }} {{ template "__telegram_status" . }} {{ template "__telegram_title" . }}
{{- "\n" -}}
{{- "\n" -}}
{{- range .Alerts -}}
{{- if .Annotations.description -}}
*Description*: {{ .Annotations.description }}
{{- "\n" -}}
{{- end }}
{{- if .Annotations.message -}}
*Message*: {{ .Annotations.message }}
{{- "\n" -}}
{{- "\n" -}}
{{- end }}
{{- end -}}
{{- end -}}

View File

@@ -0,0 +1,65 @@
groups:
- name: EcoFlow
rules:
- alert: EcoflowTempTooHigh
expr: ecoflow_inv_out_temp > 60
for: 1m
labels:
severity: critical
annotations:
summary: Inverter temperature is too high
description: "Inverter temperature {{ $labels.device }} is too high: current value is {{ $value }}"
- alert: EcoFlowOffline
expr: ecoflow_online == 0
for: 1m
labels:
severity: critical
annotations:
summary: EcoFlow is offline
description: "Device {{ $labels.device }} has disappeared from the network"
- alert: EcoFlowPowerOutage
expr: ecoflow_inv_ac_in_vol == 0
for: 0m
labels:
severity: warning
annotations:
summary: EcoFlow detects power outage
description: "Device {{ $labels.device }} detects power outage"
- alert: EcoFlowLowRemainingTime
expr: ecoflow_bms_ems_status_dsg_remain_time < 10
for: 0m
labels:
severity: critical
annotations:
summary: EcoFlow will discharge soon
description: "Device {{ $labels.device }} will discharge in {{ $value }} min"
- alert: EcoFlowHalfBattery
expr: ecoflow_bms_bms_status_f32_show_soc < 50
for: 0m
labels:
severity: warning
annotations:
summary: EcoFlow half battery
description: "Device {{ $labels.device }} battery level less than 50%"
- alert: EcoFlowLowBattery
expr: ecoflow_bms_bms_status_f32_show_soc < 10
for: 0m
labels:
severity: critical
annotations:
summary: EcoFlow low battery
description: "Device {{ $labels.device }} battery level less than 10%"
- alert: EcoFlowHighLoad
expr: ecoflow_inv_output_watts > 1700 and ecoflow_inv_ac_in_vol == 0
for: 0m
labels:
severity: warning
annotations:
summary: EcoFlow high load
description: "Device {{ $labels.device }} under high load: {{ $value }}W"

View File

@@ -3,6 +3,18 @@ global:
scrape_timeout: 10s
evaluation_interval: 30s # Evaluate rules every 30 seconds. The default is every 1 minute.
# add alertmanager. Remove this if you don't need alerts
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- alertmanager:9093
# can be removed if alertmanager is not used
rule_files:
- 'alerts/*.yml'
scrape_configs:
- job_name: prometheus
static_configs:

38
docs/alertmanager.md Normal file
View File

@@ -0,0 +1,38 @@
## How to add Alertmanager to get alerts
You can add AlertManager and configure Prometheus to send alert notifications depending on metric values. For instance when some
device goes offline or battery level is less than some value or temperature is too high, etc
### Pre-requisites:
Step #1 is mandatory, all other steps have good enough default settings. You can adjust them if required.
1. Update `bot_token` and `chat_id` in [alertmanager.yml](../docker-compose/alertmanager/alertmanager.yml)
2. Make sure that `alertmanagers` is added to
`prometheus.yml`: [prometheus.yml](../docker-compose/prometheus/prometheus.yml)
3. Configure alert rules in Prometheus. See example here: [ecoflow.yml](../docker-compose/prometheus/alerts/ecoflow.yml)
4. Adjust notification template if needed: [telegram.tmpl](../docker-compose/alertmanager/templates/telegram.tmpl)
### Installation
* Run `docker compose -f prometheus-compose.yml stop` to stop Prometheus (can be skipped if Prometheus is not running)
* Run `docker compose -f alertmanager-compose.yml up -d` to start Alertmanager
* Run `docker compose -f prometheus-compose.yml up -d` to start Prometheus
As a result you should get notifications in Telegram:
```
‼️ [FIRING:1] Inverter temperature is too high
Description: Inverter temperature Delta 2 Max is too high
```
```
✅ [RESOLVED] Inverter temperature is too high
Description: Inverter temperature Delta 2 Max is too high
```
To find `chat_id` you can use this approach: https://stackoverflow.com/questions/32423837/telegram-bot-how-to-get-a-group-chat-id
Almost all configs were taken from this repository: https://github.com/berezhinskiy/ecoflow_exporter