2023-02-28 14:29:28 +01:00
|
|
|
# File contains default list of alerts for vmalert service.
|
2022-11-21 14:45:45 +01:00
|
|
|
# The alerts below are just recommendations and may require some updates
|
|
|
|
# and threshold calibration according to every specific setup.
|
|
|
|
groups:
|
|
|
|
# Alerts group for vmalert assumes that Grafana dashboard
|
2024-01-18 11:19:53 +01:00
|
|
|
# https://grafana.com/grafana/dashboards/14950/ is installed.
|
2022-11-21 14:45:45 +01:00
|
|
|
# Pls update the `dashboard` annotation according to your setup.
|
|
|
|
- name: vmalert
|
2022-11-21 15:23:23 +01:00
|
|
|
interval: 30s
|
2022-11-21 14:45:45 +01:00
|
|
|
rules:
|
|
|
|
- alert: ConfigurationReloadFailure
|
|
|
|
expr: vmalert_config_last_reload_successful != 1
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
summary: "Configuration reload failed for vmalert instance {{ $labels.instance }}"
|
|
|
|
description: "Configuration hot-reload failed for vmalert on instance {{ $labels.instance }}.
|
|
|
|
Check vmalert's logs for detailed error message."
|
|
|
|
|
|
|
|
- alert: AlertingRulesError
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: sum(increase(vmalert_alerting_rules_errors_total[5m])) without(alertname, id) > 0
|
2022-11-21 14:45:45 +01:00
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
|
|
|
|
summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}"
|
|
|
|
description: "Alerting rules execution is failing for group \"{{ $labels.group }}\".
|
|
|
|
Check vmalert's logs for detailed error message."
|
|
|
|
|
|
|
|
- alert: RecordingRulesError
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: sum(increase(vmalert_recording_rules_errors_total[5m])) without(recording, id) > 0
|
2022-11-21 14:45:45 +01:00
|
|
|
for: 5m
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
|
|
|
|
summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}"
|
|
|
|
description: "Recording rules execution is failing for group \"{{ $labels.group }}\".
|
|
|
|
Check vmalert's logs for detailed error message."
|
|
|
|
|
|
|
|
- alert: RecordingRulesNoData
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: sum(vmalert_recording_rules_last_evaluation_samples) without(recording, id) < 1
|
2022-11-21 14:45:45 +01:00
|
|
|
for: 30m
|
|
|
|
labels:
|
2023-04-26 12:57:41 +02:00
|
|
|
severity: info
|
2022-11-21 14:45:45 +01:00
|
|
|
annotations:
|
|
|
|
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-group={{ $labels.group }}"
|
|
|
|
summary: "Recording rule {{ $labels.recording }} ({ $labels.group }}) produces no data"
|
|
|
|
description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\"
|
|
|
|
produces 0 samples over the last 30min. It might be caused by a misconfiguration
|
|
|
|
or incorrect query expression."
|
|
|
|
|
2023-10-31 10:30:05 +01:00
|
|
|
- alert: TooManyMissedIterations
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: increase(vmalert_iteration_missed_total[5m]) > 0
|
2023-10-31 10:30:05 +01:00
|
|
|
for: 15m
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
summary: "vmalert instance {{ $labels.instance }} is missing rules evaluations"
|
|
|
|
description: "vmalert instance {{ $labels.instance }} is missing rules evaluations for group \"{{ $labels.group }}\".
|
|
|
|
The group evaluation time takes longer than the configured evaluation interval. This may result in missed
|
|
|
|
alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency of
|
2024-04-18 01:44:12 +02:00
|
|
|
group \"{{ $labels.group }}\". See https://docs.victoriametrics.com/vmalert/#groups.
|
2023-10-31 10:30:05 +01:00
|
|
|
If rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/Troubleshooting.html#slow-queries."
|
|
|
|
|
2022-11-21 14:45:45 +01:00
|
|
|
- alert: RemoteWriteErrors
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: increase(vmalert_remotewrite_errors_total[5m]) > 0
|
2022-11-21 14:45:45 +01:00
|
|
|
for: 15m
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
summary: "vmalert instance {{ $labels.instance }} is failing to push metrics to remote write URL"
|
|
|
|
description: "vmalert instance {{ $labels.instance }} is failing to push metrics generated via alerting
|
|
|
|
or recording rules to the configured remote write URL. Check vmalert's logs for detailed error message."
|
|
|
|
|
|
|
|
- alert: AlertmanagerErrors
|
2023-12-11 15:17:30 +01:00
|
|
|
expr: increase(vmalert_alerts_send_errors_total[5m]) > 0
|
2022-11-21 14:45:45 +01:00
|
|
|
for: 15m
|
|
|
|
labels:
|
|
|
|
severity: warning
|
|
|
|
annotations:
|
|
|
|
summary: "vmalert instance {{ $labels.instance }} is failing to send notifications to Alertmanager"
|
|
|
|
description: "vmalert instance {{ $labels.instance }} is failing to send alert notifications to \"{{ $labels.addr }}\".
|
|
|
|
Check vmalert's logs for detailed error message."
|